set.seed(params$seed)
library(funModeling)
library(tidyverse)
library(Hmisc)
library(biomaRt)
library(survminer)
library(glmnet)
library(glmSparseNet)
library(PRROC)
library(propagate)
library(lsa)
library(edgeR)
library(limma)
library(Glimma)
library(gplots)
library(DESeq2)
library(RColorBrewer)
library(GEOquery)
library(tibble)
library ( DESeq2 )
library(NMF)
library(ISLR)
library(tree)
library(readxl)
# library(ggbiplot)
library(caret)
library(rpart)
library(rpart.plot)
library(futile.logger)
library(ggpubr)
library(rstatix)
library(writexl)
library(pROC)
rnaseq1 <- read_excel("data/rnaseq_LCosta.xlsx")
rnaseq1 <- as.data.frame(rnaseq1)
ensembl = useEnsembl(biomart="ensembl", dataset="hsapiens_gene_ensembl")
hgnc_swissprot <- getBM(attributes=c('ensembl_gene_id','hgnc_symbol'),filters = 'ensembl_gene_id', values = rnaseq1$...1, mart = ensembl)
hgnc_swissprot[1:3,1:2]
## ensembl_gene_id hgnc_symbol
## 1 ENSG00000000003 TSPAN6
## 2 ENSG00000000005 TNMD
## 3 ENSG00000000419 DPM1
hgnc_swissprot <- hgnc_swissprot[!duplicated(hgnc_swissprot$ensembl_gene_id), ]
rnaseq1 <- rnaseq1[rnaseq1$...1 %in%
hgnc_swissprot$ensembl_gene_id,]
rnaseq1$genes <- hgnc_swissprot$hgnc_symbol
rnaseq1 <- rnaseq1[!duplicated(rnaseq1$genes), ]
which(is.na(rnaseq1[,101]))
## integer(0)
# rnaseq <- rnaseq[-38532,]
rownames(rnaseq1) <- rnaseq1$genes
rnaseq1 <- rnaseq1[,-c(1,101)]
rnaseq1 <- t(rnaseq1)
which(colnames(rnaseq1)=="")
## [1] 1599
rnaseq1 <- rnaseq1[,-1599]
rnaseq1 <- as.data.frame(rnaseq1)
rnaseq1$row <- rownames(rnaseq1)
dim(rnaseq1)
## [1] 99 39475
rnaseq2 <- read_excel("data/rnaseq_illumina.xlsx")
rnaseq2 <- as.data.frame(rnaseq2)
ensembl = useEnsembl(biomart="ensembl", dataset="hsapiens_gene_ensembl")
hgnc_swissprot <- getBM(attributes=c('ensembl_gene_id','hgnc_symbol'),filters = 'ensembl_gene_id', values = rnaseq2$...1, mart = ensembl)
hgnc_swissprot[1:3,1:2]
## ensembl_gene_id hgnc_symbol
## 1 ENSG00000000003 TSPAN6
## 2 ENSG00000000005 TNMD
## 3 ENSG00000000419 DPM1
hgnc_swissprot <- hgnc_swissprot[!duplicated(hgnc_swissprot$ensembl_gene_id), ]
rnaseq2 <- rnaseq2[rnaseq2$...1 %in%
hgnc_swissprot$ensembl_gene_id,]
rnaseq2$genes <- hgnc_swissprot$hgnc_symbol
rnaseq2 <- rnaseq2[!duplicated(rnaseq2$genes), ]
which(is.na(rnaseq2[,88]))
## integer(0)
# rnaseq <- rnaseq[-38532,]
rownames(rnaseq2) <- rnaseq2$genes
rnaseq2 <- rnaseq2[,-c(1,88)]
rnaseq2 <- t(rnaseq2)
which(colnames(rnaseq2)=="")
## [1] 1599
rnaseq2 <- rnaseq2[,-1599]
rnaseq2 <- as.data.frame(rnaseq2)
rnaseq2$row <- rownames(rnaseq2)
dim(rnaseq2)
## [1] 86 39475
rnaseq <- rbind(rnaseq1,rnaseq2)
rnaseq1 <- rnaseq[!duplicated(rnaseq$row),]
rnaseq1 <- rnaseq1[,-39475]
rnaseq1 <- rnaseq1[ order(row.names(rnaseq1)), ]
DATASET1 <- read_excel("data/DATASET1NEW_illumina.xlsx")
DATASET1 <- DATASET1[ order(DATASET1$ID), ]
rownames(DATASET1) <- DATASET1$ID
## Warning: Setting row names on a tibble is deprecated.
rnaseq1 <- as.data.frame(rnaseq1[rownames(rnaseq1) %in%
rownames(DATASET1),])
DATASET1 <- as.data.frame(DATASET1[rownames(DATASET1) %in%
rownames(rnaseq1),])
clinic1 <- DATASET1[,-c(1,8,9)]
rownames(clinic1) <- DATASET1$ID
set.seed(2010)
# imbalanced data
prop.table(table(DATASET1$class))
##
## P Pm
## 0.7272727 0.2727273
df_p <- DATASET1[which(DATASET1$class == "P"),]
df_pm <- DATASET1[which(DATASET1$class == "Pm"),]
### setting negative counts to be same as positive counts - so that the data is balanced
nsample <- params$nsample
pick_negative <- sample(df_p$ID, nsample)
df_p1f <- df_p[df_p$ID %in% pick_negative, ]
df_p2 <- subset(df_p,!(ID %in% pick_negative))
nsample <- 25
pick_negative <- sample(df_p2$ID, nsample)
df_p2f <- df_p2[df_p2$ID %in% pick_negative, ]
df_p3f <- subset(df_p2,!(ID %in% pick_negative))
df1 <- rbind(df_p1f,df_pm)
df2 <- rbind(df_p2f,df_pm)
df3 <- rbind(df_p3f,df_pm)
dim(df1)
## [1] 60 9
dim(df2)
## [1] 55 9
dim(df3)
## [1] 55 9
table(df1$class)
##
## P Pm
## 30 30
table(df2$class)
##
## P Pm
## 25 30
table(df3$class)
##
## P Pm
## 25 30
clinic1_bal <- df1[,-c(1,8,9)]
datasurv1 <- as.data.frame(df1[,8:9])
rownames(clinic1_bal) <- df1$ID
rownames(datasurv1) <- df1$ID
clinic2_bal <- df2[,-c(1,8,9)]
datasurv2 <- as.data.frame(df2[,8:9])
rownames(clinic2_bal) <- df2$ID
rownames(datasurv2) <- df2$ID
clinic3_bal <- df3[,-c(1,8,9)]
datasurv3 <- as.data.frame(df3[,8:9])
rownames(clinic3_bal) <- df3$ID
rownames(datasurv3) <- df3$ID
load("~/CRC_LCosta/results/results1_2010_final.RData")
DATASET1_bal <- df1
DATASET1 <- DATASET1_bal
clinic1 <- clinic1_bal
# primM <- DATASET1 %>%
# filter(str_detect(class, "m"))
# rownames(primM) <- primM$ID
# primN <- DATASET1 %>%
# filter(!str_detect(class, "Pm"))
# rownames(primN) <- primN$ID
#
# clinic_prim <- DATASET1
#
# rnaprimM <- rnaseq1[rownames(rnaseq1) %in%
# rownames(primM),]
#
# rnaprimN <- rnaseq1[rownames(rnaseq1) %in%
# rownames(primN),]
#
# rnaprim <- rbind(rnaprimM,rnaprimN)
# xmet <- rnaprimM [,sapply(seq(ncol(rnaprimM)), function(ix) {sd(rnaprimM[,ix])}) != 0]
# xnon <- rnaprimN[,sapply(seq(ncol(rnaprimN)), function(ix) {sd(rnaprimN[,ix])}) != 0]
#
# xmet_less <- xmet[,which(colnames(xmet) %in% colnames(xnon))]
# xnon_less <- xnon[,which(colnames(xnon) %in% colnames(xmet))]
#
# # normalizing data
# xmet_norm <- scale(log2(xmet_less+1))
# xnon_norm <- scale(log2(xnon_less+1))
#
# xdataT <- rbind(xmet_less,xnon_less)
# xdataT <- xdataT[, !sapply(xdataT, function(x) { sd(x) == 0} )]
# xdataT <- xdataT[ order(row.names(xdataT)), ]
#
#
#
# rm(xmet,xmet_less,xnon,xnon_less,rnaprimM,rnaprimN)
# #xmet_cor <- Matrix(cor(xmet_norm), sparse = TRUE)
# xmet_cor <- cor(xmet_norm)
# #xmet_cor <- as.data.frame(xmet_cor)
# xnon_cor <- cor(xnon_norm)
# #xnon_cor <- as.data.frame(xnon_cor)
#
# # angular distance
# ang_weight <- vector()
# for (i in 1:dim(xmet_cor)[2]){
# ang_weight[i] <- acos(cosine(xmet_cor[,i],xnon_cor[,i]))/pi
# }
#
# ## normalized weights
#
# weights <- ang_weight / max(ang_weight)
# hist(weights,main="w")
#
#
# pen_weight1 <- 1 / weights
# hist(pen_weight1, main="1 / w")
#
# rm(xmet_cor,xnon_cor)
clinical <- as.data.frame(clinic1_bal)
basic_eda <- function(clinical)
{
glimpse(clinical)
#df_Status(clinical)
freq(clinical)
profiling_num(clinical)
plot_num(clinical)
describe(clinic1)
}
basic_eda(clinical)
## Rows: 60
## Columns: 6
## $ class <chr> "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", …
## $ organ <chr> "rectum", "colon", "rectum", "colon", "colon", "colon", "col…
## $ Sex <chr> "m", "f", "m", "f", "f", "m", "f", "m", "m", "f", "f", "f", …
## $ Age <dbl> 78, 74, 65, 79, 74, 52, 57, 63, 86, 69, 40, 65, 60, 65, 84, …
## $ Stage <chr> "III", "II", "II", "III", "II", "III", "III", "II", "II", "I…
## $ sidedness <chr> "rectum", "right", "rectum", "left", "left", "left", "right"…
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## class frequency percentage cumulative_perc
## 1 P 30 50 50
## 2 Pm 30 50 100
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## organ frequency percentage cumulative_perc
## 1 colon 53 88.33 88.33
## 2 rectum 7 11.67 100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## Sex frequency percentage cumulative_perc
## 1 f 37 61.67 61.67
## 2 m 23 38.33 100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## Stage frequency percentage cumulative_perc
## 1 II 32 53.33 53.33
## 2 III 28 46.67 100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## sidedness frequency percentage cumulative_perc
## 1 left 26 43.33 43.33
## 2 right 23 38.33 81.66
## 3 rectum 7 11.67 93.33
## 4 <NA> 4 6.67 100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## clinic1
##
## 6 Variables 60 Observations
## --------------------------------------------------------------------------------
## class
## n missing distinct
## 60 0 2
##
## Value P Pm
## Frequency 30 30
## Proportion 0.5 0.5
## --------------------------------------------------------------------------------
## organ
## n missing distinct
## 60 0 2
##
## Value colon rectum
## Frequency 53 7
## Proportion 0.883 0.117
## --------------------------------------------------------------------------------
## Sex
## n missing distinct
## 60 0 2
##
## Value f m
## Frequency 37 23
## Proportion 0.617 0.383
## --------------------------------------------------------------------------------
## Age
## n missing distinct Info Mean Gmd .05 .10
## 47 13 27 0.998 68.55 14.37 48.5 54.8
## .25 .50 .75 .90 .95
## 60.0 69.0 78.5 85.0 86.0
##
## lowest : 37 40 47 52 53, highest: 84 85 86 87 91
## --------------------------------------------------------------------------------
## Stage
## n missing distinct
## 60 0 2
##
## Value II III
## Frequency 32 28
## Proportion 0.533 0.467
## --------------------------------------------------------------------------------
## sidedness
## n missing distinct
## 56 4 3
##
## Value left rectum right
## Frequency 26 7 23
## Proportion 0.464 0.125 0.411
## --------------------------------------------------------------------------------
a <- na.omit(clinical$Age)
mean(a)
## [1] 68.55319
clinical_p <- clinic1_bal %>%
filter(!str_detect(class, "Pm"))
basic_eda <- function(clinical_p)
{
glimpse(clinical_p)
#df_Status(clinical_p)
freq(clinical_p)
profiling_num(clinical_p)
plot_num(clinical_p)
describe(clinical_p)
}
basic_eda(clinical_p)
## Rows: 30
## Columns: 6
## $ class <chr> "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", …
## $ organ <chr> "rectum", "colon", "rectum", "colon", "colon", "colon", "col…
## $ Sex <chr> "m", "f", "m", "f", "f", "m", "f", "m", "m", "f", "f", "f", …
## $ Age <dbl> 78, 74, 65, 79, 74, 52, 57, 63, 86, 69, 40, 65, 60, 65, 84, …
## $ Stage <chr> "III", "II", "II", "III", "II", "III", "III", "II", "II", "I…
## $ sidedness <chr> "rectum", "right", "rectum", "left", "left", "left", "right"…
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## class frequency percentage cumulative_perc
## 1 P 30 100 100
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## organ frequency percentage cumulative_perc
## 1 colon 28 93.33 93.33
## 2 rectum 2 6.67 100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## Sex frequency percentage cumulative_perc
## 1 f 20 66.67 66.67
## 2 m 10 33.33 100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## Stage frequency percentage cumulative_perc
## 1 II 22 73.33 73.33
## 2 III 8 26.67 100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## sidedness frequency percentage cumulative_perc
## 1 right 14 46.67 46.67
## 2 left 13 43.33 90.00
## 3 rectum 2 6.67 96.67
## 4 <NA> 1 3.33 100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## clinical_p
##
## 6 Variables 30 Observations
## --------------------------------------------------------------------------------
## class
## n missing distinct value
## 30 0 1 P
##
## Value P
## Frequency 30
## Proportion 1
## --------------------------------------------------------------------------------
## organ
## n missing distinct
## 30 0 2
##
## Value colon rectum
## Frequency 28 2
## Proportion 0.933 0.067
## --------------------------------------------------------------------------------
## Sex
## n missing distinct
## 30 0 2
##
## Value f m
## Frequency 20 10
## Proportion 0.667 0.333
## --------------------------------------------------------------------------------
## Age
## n missing distinct Info Mean Gmd .05 .10
## 21 9 15 0.995 68.29 15.46 47 52
## .25 .50 .75 .90 .95
## 60 65 79 84 86
##
## lowest : 40 47 52 57 60, highest: 79 83 84 86 91
##
## Value 40 47 52 57 60 63 65 69 74 78 79
## Frequency 1 1 1 1 2 2 3 1 2 1 2
## Proportion 0.048 0.048 0.048 0.048 0.095 0.095 0.143 0.048 0.095 0.048 0.095
##
## Value 83 84 86 91
## Frequency 1 1 1 1
## Proportion 0.048 0.048 0.048 0.048
## --------------------------------------------------------------------------------
## Stage
## n missing distinct
## 30 0 2
##
## Value II III
## Frequency 22 8
## Proportion 0.733 0.267
## --------------------------------------------------------------------------------
## sidedness
## n missing distinct
## 29 1 3
##
## Value left rectum right
## Frequency 13 2 14
## Proportion 0.448 0.069 0.483
## --------------------------------------------------------------------------------
a <- na.omit(clinical_p$Age)
mean(a)
## [1] 68.28571
clinical_Pm <- clinic1_bal%>%
filter(str_detect(class, "Pm"))
basic_eda <- function(clinical_Pm)
{
glimpse(clinical_Pm)
#df_Status(clinical_Pm)
freq(clinical_Pm)
profiling_num(clinical_Pm)
plot_num(clinical_Pm)
describe(clinical_Pm)
}
basic_eda(clinical_Pm)
## Rows: 30
## Columns: 6
## $ class <chr> "Pm", "Pm", "Pm", "Pm", "Pm", "Pm", "Pm", "Pm", "Pm", "Pm", …
## $ organ <chr> "rectum", "colon", "rectum", "colon", "colon", "colon", "rec…
## $ Sex <chr> "f", "f", "f", "m", "f", "f", "m", "f", "f", "m", "m", "f", …
## $ Age <dbl> 58, 85, 62, 67, 75, 69, 58, 57, 72, 65, 61, 78, 81, 57, 74, …
## $ Stage <chr> "II", "II", "III", "III", "III", "II", "II", "III", "II", "I…
## $ sidedness <chr> "left", "right", "rectum", "rectum", "left", "left", "rectum…
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## class frequency percentage cumulative_perc
## 1 Pm 30 100 100
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## organ frequency percentage cumulative_perc
## 1 colon 25 83.33 83.33
## 2 rectum 5 16.67 100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## Sex frequency percentage cumulative_perc
## 1 f 17 56.67 56.67
## 2 m 13 43.33 100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## Stage frequency percentage cumulative_perc
## 1 III 20 66.67 66.67
## 2 II 10 33.33 100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## sidedness frequency percentage cumulative_perc
## 1 left 13 43.33 43.33
## 2 right 9 30.00 73.33
## 3 rectum 5 16.67 90.00
## 4 <NA> 3 10.00 100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## clinical_Pm
##
## 6 Variables 30 Observations
## --------------------------------------------------------------------------------
## class
## n missing distinct value
## 30 0 1 Pm
##
## Value Pm
## Frequency 30
## Proportion 1
## --------------------------------------------------------------------------------
## organ
## n missing distinct
## 30 0 2
##
## Value colon rectum
## Frequency 25 5
## Proportion 0.833 0.167
## --------------------------------------------------------------------------------
## Sex
## n missing distinct
## 30 0 2
##
## Value f m
## Frequency 17 13
## Proportion 0.567 0.433
## --------------------------------------------------------------------------------
## Age
## n missing distinct Info Mean Gmd .05 .10
## 26 4 18 0.997 68.77 13.91 53.75 56.50
## .25 .50 .75 .90 .95
## 58.75 69.00 77.25 85.00 85.75
##
## lowest : 37 53 56 57 58, highest: 78 81 85 86 87
##
## Value 37 53 56 57 58 61 62 65 67 69 72
## Frequency 1 1 1 2 2 1 1 1 1 3 2
## Proportion 0.038 0.038 0.038 0.077 0.077 0.038 0.038 0.038 0.038 0.115 0.077
##
## Value 74 75 78 81 85 86 87
## Frequency 2 1 1 2 2 1 1
## Proportion 0.077 0.038 0.038 0.077 0.077 0.038 0.038
## --------------------------------------------------------------------------------
## Stage
## n missing distinct
## 30 0 2
##
## Value II III
## Frequency 10 20
## Proportion 0.333 0.667
## --------------------------------------------------------------------------------
## sidedness
## n missing distinct
## 27 3 3
##
## Value left rectum right
## Frequency 13 5 9
## Proportion 0.481 0.185 0.333
## --------------------------------------------------------------------------------
a <- na.omit(clinical_Pm$Age)
mean(a)
## [1] 68.76923
clinical_factor <- clinic1_bal
clinical_factor <- clinical_factor %>%
mutate_if(sapply(clinical_factor, is.character), as.factor)
#clinical_factor <- na.omit(clinical_factor)
ggplot(data = clinical_factor) +
geom_bar(mapping = aes(x = class, fill = organ), position = "fill") + scale_fill_manual(values=c("#ADD8E6", "#4682B4"))
ggplot(data = clinical_factor) +
geom_bar(mapping = aes(x = class, fill = Sex), position = "fill") + scale_fill_manual(values=c("#ADD8E6", "#4682B4"))
clinical_factor1 <- clinical_factor %>%
filter(!str_detect(sidedness, "rectum"))
ggplot(data = clinical_factor1) +
geom_bar(mapping = aes(x = class, fill = sidedness), position = "fill") + scale_fill_manual(values=c("#ADD8E6", "#4682B4"))
ggplot(data = clinical_factor) +
geom_bar(mapping = aes(x = class, fill = Stage), position = "fill") + scale_fill_manual(values=c("#ADD8E6", "#4682B4", "#000080"))
clinical_factor %>%
ggplot( aes(x=Age, fill=class)) +
geom_histogram( color="#e9ecef", alpha=0.6, position = 'identity') +
scale_fill_manual(values=c("#69b3a2", "#404080"))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 13 rows containing non-finite values (stat_bin).
labs(fill="")
## $fill
## [1] ""
##
## attr(,"class")
## [1] "labels"
stat_data_organ <- table(clinical_factor$class,clinical_factor$organ)
#fazer plot(...)
fisher.test(stat_data_organ)
##
## Fisher's Exact Test for Count Data
##
## data: stat_data_organ
## p-value = 0.4238
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.4061343 31.3554016
## sample estimates:
## odds ratio
## 2.754099
stat_data_sex <- table(clinical_factor$class,clinical_factor$Sex)
fisher.test(stat_data_sex)
##
## Fisher's Exact Test for Count Data
##
## data: stat_data_sex
## p-value = 0.5959
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.4748001 4.9821718
## sample estimates:
## odds ratio
## 1.518518
stat_data_stage <- table(clinical_factor$class,clinical_factor$Stage)
fisher.test(stat_data_stage)
##
## Fisher's Exact Test for Count Data
##
## data: stat_data_stage
## p-value = 0.004025
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 1.603346 19.445739
## sample estimates:
## odds ratio
## 5.327244
stat_data_side <- table(clinical_factor$class,clinical_factor$sidedness)
fisher.test(stat_data_side)
##
## Fisher's Exact Test for Count Data
##
## data: stat_data_side
## p-value = 0.3185
## alternative hypothesis: two.sided
hist(clinical_factor$Age[clinical_factor$class=="P"])
hist(clinical_factor$Age[clinical_factor$class=="Pm"])
tapply(clinical_factor$Age,clinical_factor$class, summary)
## $P
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 40.00 60.00 65.00 68.29 79.00 91.00 9
##
## $Pm
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 37.00 58.75 69.00 68.77 77.25 87.00 4
t.test(Age ~ class, clinical_factor)
##
## Welch Two Sample t-test
##
## data: Age by class
## t = -0.12846, df = 41.11, p-value = 0.8984
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -8.084572 7.117539
## sample estimates:
## mean in group P mean in group Pm
## 68.28571 68.76923
data <- merge(datasurv1, clinical, by="row.names")
fit <- survfit(Surv(time, Status) ~ Stage, data = data)
print(fit)
## Call: survfit(formula = Surv(time, Status) ~ Stage, data = data)
##
## n events median 0.95LCL 0.95UCL
## Stage=II 32 7 NA 1635 NA
## Stage=III 28 17 1157 797 NA
# Summary of survival curves
summary(fit)
## Call: survfit(formula = Surv(time, Status) ~ Stage, data = data)
##
## Stage=II
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 173 32 1 0.969 0.0308 0.910 1.000
## 400 30 1 0.936 0.0435 0.855 1.000
## 1357 16 1 0.878 0.0698 0.751 1.000
## 1428 14 1 0.815 0.0886 0.659 1.000
## 1461 13 1 0.753 0.1016 0.578 0.980
## 1635 11 1 0.684 0.1131 0.495 0.946
## 2011 10 1 0.616 0.1207 0.419 0.904
##
## Stage=III
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 216 28 1 0.964 0.0351 0.898 1.000
## 386 27 1 0.929 0.0487 0.838 1.000
## 420 26 1 0.893 0.0585 0.785 1.000
## 500 25 1 0.857 0.0661 0.737 0.997
## 518 24 1 0.821 0.0724 0.691 0.976
## 520 23 1 0.786 0.0775 0.648 0.953
## 573 22 1 0.750 0.0818 0.606 0.929
## 632 21 1 0.714 0.0854 0.565 0.903
## 682 20 1 0.679 0.0883 0.526 0.876
## 797 18 1 0.641 0.0911 0.485 0.847
## 807 17 1 0.603 0.0932 0.446 0.816
## 857 16 1 0.565 0.0947 0.407 0.785
## 1107 14 1 0.525 0.0961 0.367 0.752
## 1157 13 1 0.485 0.0969 0.328 0.717
## 1682 10 1 0.436 0.0986 0.280 0.679
## 1835 9 1 0.388 0.0988 0.235 0.639
## 3667 4 1 0.291 0.1120 0.137 0.619
# Access to the sort summary table
summary(fit)$table
## records n.max n.start events *rmean *se(rmean) median 0.95LCL
## Stage=II 32 32 32 7 3111.160 332.7298 NA 1635
## Stage=III 28 28 28 17 2088.959 321.2685 1157 797
## 0.95UCL
## Stage=II NA
## Stage=III NA
ggsurvplot(fit, data = data, pval = TRUE)
# the log/rank test
surv.stage <- survdiff(Surv(time,Status) ~ Stage, data = data)
surv.stage
## Call:
## survdiff(formula = Surv(time, Status) ~ Stage, data = data)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## Stage=II 32 7 13.3 3.01 6.8
## Stage=III 28 17 10.7 3.76 6.8
##
## Chisq= 6.8 on 1 degrees of freedom, p= 0.009
fit <- survfit(Surv(time, Status) ~ class, data = data)
print(fit)
## Call: survfit(formula = Surv(time, Status) ~ class, data = data)
##
## n events median 0.95LCL 0.95UCL
## class=P 30 2 NA 3667 NA
## class=Pm 30 22 1107 682 2011
# Summary of survival curves
summary(fit)
## Call: survfit(formula = Surv(time, Status) ~ class, data = data)
##
## class=P
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 1461 13 1 0.923 0.0739 0.789 1
## 3667 5 1 0.738 0.1754 0.464 1
##
## class=Pm
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 173 30 1 0.967 0.0328 0.905 1.000
## 216 29 1 0.933 0.0455 0.848 1.000
## 386 28 1 0.900 0.0548 0.799 1.000
## 400 27 1 0.867 0.0621 0.753 0.997
## 420 26 1 0.833 0.0680 0.710 0.978
## 500 25 1 0.800 0.0730 0.669 0.957
## 518 24 1 0.767 0.0772 0.629 0.934
## 520 23 1 0.733 0.0807 0.591 0.910
## 573 22 1 0.700 0.0837 0.554 0.885
## 632 21 1 0.667 0.0861 0.518 0.859
## 682 20 1 0.633 0.0880 0.482 0.832
## 797 19 1 0.600 0.0894 0.448 0.804
## 807 18 1 0.567 0.0905 0.414 0.775
## 857 17 1 0.533 0.0911 0.382 0.745
## 1107 15 1 0.498 0.0917 0.347 0.714
## 1157 14 1 0.462 0.0918 0.313 0.682
## 1357 13 1 0.427 0.0913 0.280 0.649
## 1428 12 1 0.391 0.0904 0.249 0.615
## 1635 11 1 0.356 0.0889 0.218 0.580
## 1682 10 1 0.320 0.0868 0.188 0.545
## 1835 9 1 0.284 0.0841 0.159 0.508
## 2011 7 1 0.244 0.0813 0.127 0.469
# Access to the sort summary table
summary(fit)$table
## records n.max n.start events *rmean *se(rmean) median 0.95LCL
## class=P 30 30 30 2 3887.215 212.9214 NA 3667
## class=Pm 30 30 30 22 1716.567 276.5550 1107 682
## 0.95UCL
## class=P NA
## class=Pm 2011
ggsurvplot(fit, data = data, pval = TRUE)
# the log/rank test
surv.stage <- survdiff(Surv(time,Status) ~ class, data = data)
surv.stage
## Call:
## survdiff(formula = Surv(time, Status) ~ class, data = data)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## class=P 30 2 13 9.32 20.5
## class=Pm 30 22 11 11.05 20.5
##
## Chisq= 20.5 on 1 degrees of freedom, p= 6e-06
newdata <- data[-which(data$sidedness == "rectum"),]
fit <- survfit(Surv(time, Status) ~ sidedness, data = newdata)
print(fit)
## Call: survfit(formula = Surv(time, Status) ~ sidedness, data = newdata)
##
## 4 observations deleted due to missingness
## n events median 0.95LCL 0.95UCL
## sidedness=left 26 8 NA 1835 NA
## sidedness=right 23 9 1635 1461 NA
# Summary of survival curves
summary(fit)
## Call: survfit(formula = Surv(time, Status) ~ sidedness, data = newdata)
##
## 4 observations deleted due to missingness
## sidedness=left
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 400 25 1 0.960 0.0392 0.886 1.000
## 520 24 1 0.920 0.0543 0.820 1.000
## 632 23 1 0.880 0.0650 0.761 1.000
## 682 22 1 0.840 0.0733 0.708 0.997
## 807 20 1 0.798 0.0808 0.654 0.973
## 1107 15 1 0.745 0.0913 0.586 0.947
## 1428 13 1 0.688 0.1006 0.516 0.916
## 1835 10 1 0.619 0.1116 0.434 0.881
##
## sidedness=right
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 173 23 1 0.957 0.0425 0.877 1.000
## 420 22 1 0.913 0.0588 0.805 1.000
## 500 21 1 0.870 0.0702 0.742 1.000
## 797 16 1 0.815 0.0843 0.666 0.998
## 857 15 1 0.761 0.0946 0.596 0.971
## 1157 12 1 0.697 0.1058 0.518 0.939
## 1461 7 1 0.598 0.1294 0.391 0.914
## 1635 6 1 0.498 0.1411 0.286 0.868
## 2011 5 1 0.399 0.1438 0.197 0.808
# Access to the sort summary table
summary(fit)$table
## records n.max n.start events *rmean *se(rmean) median 0.95LCL
## sidedness=left 26 26 26 8 2985.803 345.9387 NA 1835
## sidedness=right 23 23 23 9 2391.543 414.8524 1635 1461
## 0.95UCL
## sidedness=left NA
## sidedness=right NA
ggsurvplot(fit, data = newdata, pval = TRUE)
# the log/rank test
surv.stage <- survdiff(Surv(time,Status) ~ sidedness, data = newdata)
surv.stage
## Call:
## survdiff(formula = Surv(time, Status) ~ sidedness, data = newdata)
##
## n=49, 4 observations deleted due to missingness.
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## sidedness=left 26 8 9.7 0.299 0.702
## sidedness=right 23 9 7.3 0.398 0.702
##
## Chisq= 0.7 on 1 degrees of freedom, p= 0.4
xdata <- rnaseq1[ order(row.names(rnaseq1)), ]
rownames(DATASET1_bal) <- DATASET1_bal$ID
ydata <- as.data.frame(DATASET1_bal[ order(row.names(DATASET1_bal)), ])
rownames(ydata) <- ydata$ID
xdata <- xdata[rownames(xdata) %in%
rownames(ydata),]
ydata <- as.data.frame(ydata[rownames(ydata) %in%
rownames(xdata),])
# keep features with standard deviation > 0
xdata <- xdata[,sapply(seq(ncol(xdata)), function(ix) {sd(xdata[,ix])}) != 0]
xdata <- t(xdata)
group <- as.factor(ydata$class)
class <- as.data.frame(ydata$class)
edgeR.DGElist <- DGEList(counts=xdata, group = group)
# remove genes that do not have one count per million in at least 5 samples
keep <- rowSums(cpm(edgeR.DGElist) >= 1) >= 5
edgeR.DGElist <- edgeR.DGElist[keep ,]
# specify the design setup
design <- model.matrix(~group)
# estimate the dispersion for all read counts across all samples
edgeR.DGElist <- estimateDisp(edgeR.DGElist, design)
# fit the negative binomial model
edger_fit <- glmFit(edgeR.DGElist, design )
# perform the testing for every gene using the neg. binomial model
edger_lrt <- glmLRT(edger_fit)
summary(decideTests(edger_lrt))
## groupPm
## Down 7944
## NotSig 10916
## Up 1589
# extract results from edger _lrt$ table plus adjusted p- values
DGE.results_edgeR <- topTags(edger_lrt, n = Inf , sort.by = "PValue" , adjust.method = "BH" )
topTags(DGE.results_edgeR) #table with the top10 DEGs
## Coefficient: groupPm
## logFC logCPM LR PValue FDR
## LRP4 -2.569312 4.9195654 38.64129 5.093015e-10 1.041471e-05
## AXIN2 -1.932022 6.2429524 35.94787 2.026676e-09 1.847888e-05
## MIR3197 4.046824 -2.1741728 35.38116 2.710970e-09 1.847888e-05
## TFF2 3.209515 2.0356193 34.71053 3.825576e-09 1.955730e-05
## RNU7-77P 3.584652 -2.1579934 33.58504 6.821530e-09 2.339502e-05
## GBP4 -2.447218 4.2258158 33.23732 8.157008e-09 2.339502e-05
## RNU6-83P 3.279494 -0.6616794 33.10829 8.716611e-09 2.339502e-05
## IDO1 -3.193236 2.8868167 33.01341 9.152535e-09 2.339502e-05
## RNU6-769P 3.525962 -1.7899718 31.68738 1.810956e-08 3.760618e-05
## LGR6 -1.911118 2.0345325 31.65751 1.839023e-08 3.760618e-05
genes_deg <- DGE.results_edgeR$table
genes_deg <- genes_deg[which(genes_deg$FDR < 0.05),]
dim(genes_deg)#genes found to be differentially expressed
## [1] 9533 5
genes_deg$row <- row.names(genes_deg)
write_xlsx(genes_deg,"genes_deg_d1.xlsx")
top100_deg <- rownames(genes_deg[1:100,])
# highly expressed genes
high <- genes_deg[genes_deg$logFC > 0,]
dim(high)
## [1] 1589 6
top100_high <- rownames(high[1:100,])
#low expressed genes
low <- genes_deg[genes_deg$logFC < 0,]
dim(low)
## [1] 7944 6
top100_low <- rownames(low[1:100,])
xdata.raw <- xdataT
# keep features with standard deviation > 0
xdata <- xdata.raw[,sapply(seq(ncol(xdata.raw)), function(ix) {sd(xdata.raw[,ix])}) != 0]
ydata.raw <- as.data.frame(clinic1$class)
ydata.raw$row <- DATASET1$ID
# ydata.raw$class <- c(rep(0,28),rep(1,34))
ydata.raw$`clinic1$class`[ydata.raw$`clinic1$class` == "P"] <- 0
ydata.raw$`clinic1$class`[ydata.raw$`clinic1$class` == "Pm"] <- 1
rownames(ydata.raw) <- DATASET1$ID
xdata <- xdata[rownames(xdata) %in%
rownames(ydata.raw),]
ydata.raw <- as.data.frame(ydata.raw[rownames(ydata.raw) %in%
rownames(xdata),])
xdata <- xdata[ order(row.names(xdata)), ]
ydata.raw <- ydata.raw[ order(row.names(ydata.raw)), ]
ydata <- as.data.frame(ydata.raw[,1:2])
ydata$`clinic1$class` <- as.numeric(ydata$`clinic1$class`)
Five classifiers were used: Decision trees, linear and radial support vector machines, logistic regression and random forest
xdata <- xdataT[,top100_deg[1:50]]
nomesgenes <- colnames(xdata)
colnames(xdata) <- paste0("Var", 1:50)
colnames(ydata) <- c("class","row")
xdata$type <- as.factor(ydata$class)
#xdata <- xdata[colMeans(xdata == 0) <= 0.6] #delete genes that have null values in at least 60% of the samples
## [1] 1
## [1] 1011
## [1] 2
## [1] 1022
## [1] 3
## [1] 1033
## [1] 4
## [1] 1044
## [1] 5
## [1] 1055
## [1] 6
## [1] 1066
## [1] 7
## [1] 1077
## [1] 8
## [1] 1088
## [1] 9
## [1] 1099
## [1] 10
## [1] 1110
## [1] 11
## [1] 1121
## [1] 12
## [1] 1132
## [1] 13
## [1] 1143
## [1] 14
## [1] 1154
## [1] 15
## [1] 1165
## [1] 16
## [1] 1176
## [1] 17
## [1] 1187
## [1] 18
## [1] 1198
## [1] 19
## [1] 1209
## [1] 20
## [1] 1220
## [1] 21
## [1] 1231
## [1] 22
## [1] 1242
## [1] 23
## [1] 1253
## [1] 24
## [1] 1264
## [1] 25
## [1] 1275
## [1] 26
## [1] 1286
## [1] 27
## [1] 1297
## [1] 28
## [1] 1308
## [1] 29
## [1] 1319
## [1] 30
## [1] 1330
## [1] 31
## [1] 1341
## [1] 32
## [1] 1352
## [1] 33
## [1] 1363
## [1] 34
## [1] 1374
## [1] 35
## [1] 1385
## [1] 36
## [1] 1396
## [1] 37
## [1] 1407
## [1] 38
## [1] 1418
## [1] 39
## [1] 1429
## [1] 40
## [1] 1440
## [1] 41
## [1] 1451
## [1] 42
## [1] 1462
## [1] 43
## [1] 1473
## [1] 44
## [1] 1484
## [1] 45
## [1] 1495
## [1] 46
## [1] 1506
## [1] 47
## [1] 1517
## [1] 48
## [1] 1528
## [1] 49
## [1] 1539
## [1] 50
## [1] 1550
## [1] 51
## [1] 1561
## [1] 52
## [1] 1572
## [1] 53
## [1] 1583
## [1] 54
## [1] 1594
## [1] 55
## [1] 1605
## [1] 56
## [1] 1616
## [1] 57
## [1] 1627
## [1] 58
## [1] 1638
## [1] 59
## [1] 1649
## [1] 60
## [1] 1660
## [1] 61
## [1] 1671
## [1] 62
## [1] 1682
## [1] 63
## [1] 1693
## [1] 64
## [1] 1704
## [1] 65
## [1] 1715
## [1] 66
## [1] 1726
## [1] 67
## [1] 1737
## [1] 68
## [1] 1748
## [1] 69
## [1] 1759
## [1] 70
## [1] 1770
## [1] 71
## [1] 1781
## [1] 72
## [1] 1792
## [1] 73
## [1] 1803
## [1] 74
## [1] 1814
## [1] 75
## [1] 1825
## [1] 76
## [1] 1836
## [1] 77
## [1] 1847
## [1] 78
## [1] 1858
## [1] 79
## [1] 1869
## [1] 80
## [1] 1880
## [1] 81
## [1] 1891
## [1] 82
## [1] 1902
## [1] 83
## [1] 1913
## [1] 84
## [1] 1924
## [1] 85
## [1] 1935
## [1] 86
## [1] 1946
## [1] 87
## [1] 1957
## [1] 88
## [1] 1968
## [1] 89
## [1] 1979
## [1] 90
## [1] 1990
## [1] 91
## [1] 2001
## [1] 92
## [1] 2012
## [1] 93
## [1] 2023
## [1] 94
## [1] 2034
## [1] 95
## [1] 2045
## [1] 96
## [1] 2056
## [1] 97
## [1] 2067
## [1] 98
## [1] 2078
## [1] 99
## [1] 2089
## [1] 100
## [1] 2100
# acc
acc_trees <- acc_train[1,]
hist(acc_trees)
mean(acc_trees)
## [1] 0.9940476
median(acc_trees)
## [1] 1
sd(acc_trees)
## [1] 0.01090039
acc_svm <- acc_train[2,]
hist(acc_svm)
mean(acc_svm)
## [1] 0.9495238
median(acc_svm)
## [1] 0.952381
sd(acc_svm)
## [1] 0.03106779
acc_svmR <- acc_train[3,]
hist(acc_svmR)
mean(acc_svmR)
## [1] 0.8845238
median(acc_svmR)
## [1] 0.9047619
sd(acc_svmR)
## [1] 0.09880148
acc_logs <- acc_train[4,]
hist(acc_logs)
mean(acc_logs)
## [1] 1
median(acc_logs)
## [1] 1
sd(acc_logs)
## [1] 0
acc_rf <- acc_train[5,]
hist(acc_rf)
mean(acc_rf)
## [1] 1
median(acc_rf)
## [1] 1
sd(acc_rf)
## [1] 0
# auc
auc_trees <- auc_train[1,]
mean(auc_trees)
## [1] 0.9940476
median(auc_trees)
## [1] 1
sd(auc_trees)
## [1] 0.01090039
auc_svm <- auc_train[2,]
mean(auc_svm)
## [1] 0.9495238
median(auc_svm)
## [1] 0.952381
sd(auc_svm)
## [1] 0.03106779
auc_svmR <- auc_train[3,]
mean(auc_svmR)
## [1] 0.8845238
median(auc_svmR)
## [1] 0.9047619
sd(auc_svmR)
## [1] 0.09880148
auc_logs <- auc_train[4,]
mean(auc_logs)
## [1] 1
median(auc_logs)
## [1] 1
sd(auc_logs)
## [1] 0
auc_rf <- auc_train[5,]
mean(auc_rf)
## [1] 1
median(auc_rf)
## [1] 1
sd(auc_rf)
## [1] 0
# miscl
miscl_trees <- miscl_train[1,]
mean(miscl_trees)
## [1] 0.25
median(miscl_trees)
## [1] 0
sd(miscl_trees)
## [1] 0.4578165
miscl_svm <- miscl_train[2,]
mean(miscl_svm)
## [1] 2.12
median(miscl_svm)
## [1] 2
sd(miscl_svm)
## [1] 1.304847
miscl_svmR <- miscl_train[3,]
mean(miscl_svmR)
## [1] 4.85
median(miscl_svmR)
## [1] 4
sd(miscl_svmR)
## [1] 4.149662
miscl_logs <- miscl_train[4,]
mean(miscl_logs)
## [1] 0
median(miscl_logs)
## [1] 0
sd(miscl_logs)
## [1] 0
miscl_rf <- miscl_train[5,]
mean(miscl_rf)
## [1] 0
median(miscl_rf)
## [1] 0
sd(miscl_rf)
## [1] 0
# sensitivity
sensitivity_trees <- sensitivity_train[1,]
mean(sensitivity_trees)
## [1] 0.9952381
median(sensitivity_trees)
## [1] 1
sd(sensitivity_trees)
## [1] 0.01435768
sensitivity_svm <- sensitivity_train[2,]
mean(sensitivity_svm)
## [1] 0.9314286
median(sensitivity_svm)
## [1] 0.9761905
sd(sensitivity_svm)
## [1] 0.0790238
sensitivity_svmR <- sensitivity_train[3,]
mean(sensitivity_svmR)
## [1] 0.8514286
median(sensitivity_svmR)
## [1] 0.8095238
sd(sensitivity_svmR)
## [1] 0.1264926
sensitivity_logs <- sensitivity_train[4,]
mean(sensitivity_logs)
## [1] 1
median(sensitivity_logs)
## [1] 1
sd(sensitivity_logs)
## [1] 0
sensitivity_rf <- sensitivity_train[5,]
mean(sensitivity_rf)
## [1] 1
median(sensitivity_rf)
## [1] 1
sd(sensitivity_rf)
## [1] 0
# specificity
specificity_trees <- specificity_train[1,]
mean(specificity_trees)
## [1] 0.9928571
median(specificity_trees)
## [1] 1
sd(specificity_trees)
## [1] 0.01708906
specificity_svm <- specificity_train[2,]
mean(specificity_svm)
## [1] 0.967619
median(specificity_svm)
## [1] 1
sd(specificity_svm)
## [1] 0.04380074
specificity_svmR <- specificity_train[3,]
mean(specificity_svmR)
## [1] 0.917619
median(specificity_svmR)
## [1] 1
sd(specificity_svmR)
## [1] 0.1126171
specificity_logs <- specificity_train[4,]
mean(specificity_logs)
## [1] 1
median(specificity_logs)
## [1] 1
sd(specificity_logs)
## [1] 0
specificity_rf <- specificity_train[5,]
mean(specificity_rf)
## [1] 1
median(specificity_rf)
## [1] 1
sd(specificity_rf)
## [1] 0
# fneg
fneg_trees <- fneg_train[1,]
mean(fneg_trees)
## [1] 0.1
median(fneg_trees)
## [1] 0
sd(fneg_trees)
## [1] 0.3015113
fneg_svm <- fneg_train[2,]
mean(fneg_svm)
## [1] 1.44
median(fneg_svm)
## [1] 0.5
sd(fneg_svm)
## [1] 1.6595
fneg_svmR <- fneg_train[3,]
mean(fneg_svmR)
## [1] 3.12
median(fneg_svmR)
## [1] 4
sd(fneg_svmR)
## [1] 2.656344
fneg_logs <- fneg_train[4,]
mean(fneg_logs)
## [1] 0
median(fneg_logs)
## [1] 0
sd(fneg_logs)
## [1] 0
fneg_rf <- fneg_train[5,]
mean(fneg_rf)
## [1] 0
median(fneg_rf)
## [1] 0
sd(fneg_rf)
## [1] 0
# acc
acc_trees <- acc[1,]
hist(acc_trees)
mean(acc_trees)
## [1] 0.6333333
median(acc_trees)
## [1] 0.6111111
sd(acc_trees)
## [1] 0.09799079
acc_svm <- acc[2,]
hist(acc_svm)
mean(acc_svm)
## [1] 0.6861111
median(acc_svm)
## [1] 0.6666667
sd(acc_svm)
## [1] 0.1024622
acc_svmR <- acc[3,]
hist(acc_svmR)
mean(acc_svmR)
## [1] 0.6416667
median(acc_svmR)
## [1] 0.6666667
sd(acc_svmR)
## [1] 0.1009294
acc_logs <- acc[4,]
hist(acc_logs)
mean(acc_logs)
## [1] 0.6883333
median(acc_logs)
## [1] 0.6666667
sd(acc_logs)
## [1] 0.09238866
acc_rf <- acc[5,]
hist(acc_rf)
mean(acc_rf)
## [1] 0.7444444
median(acc_rf)
## [1] 0.7222222
sd(acc_rf)
## [1] 0.08898704
# auc
auc_trees <- auc[1,]
mean(auc_trees)
## [1] 0.6377778
median(auc_trees)
## [1] 0.6111111
sd(auc_trees)
## [1] 0.09157011
auc_svm <- auc[2,]
mean(auc_svm)
## [1] 0.6861111
median(auc_svm)
## [1] 0.6666667
sd(auc_svm)
## [1] 0.1024622
auc_svmR <- auc[3,]
mean(auc_svmR)
## [1] 0.6461111
median(auc_svmR)
## [1] 0.6666667
sd(auc_svmR)
## [1] 0.09431231
auc_logs <- auc[4,]
mean(auc_logs)
## [1] 0.6883333
median(auc_logs)
## [1] 0.6666667
sd(auc_logs)
## [1] 0.09238866
auc_rf <- auc[5,]
mean(auc_rf)
## [1] 0.7444444
median(auc_rf)
## [1] 0.7222222
sd(auc_rf)
## [1] 0.08898704
# miscl
miscl_trees <- miscl[1,]
mean(miscl_trees)
## [1] 6.6
median(miscl_trees)
## [1] 7
sd(miscl_trees)
## [1] 1.763834
miscl_svm <- miscl[2,]
mean(miscl_svm)
## [1] 5.65
median(miscl_svm)
## [1] 6
sd(miscl_svm)
## [1] 1.84432
miscl_svmR <- miscl[3,]
mean(miscl_svmR)
## [1] 6.45
median(miscl_svmR)
## [1] 6
sd(miscl_svmR)
## [1] 1.816729
miscl_logs <- miscl[4,]
mean(miscl_logs)
## [1] 5.61
median(miscl_logs)
## [1] 6
sd(miscl_logs)
## [1] 1.662996
miscl_rf <- miscl[5,]
mean(miscl_rf)
## [1] 4.6
median(miscl_rf)
## [1] 5
sd(miscl_rf)
## [1] 1.601767
# sensitivity
sensitivity_trees <- sensitivity[1,]
mean(sensitivity_trees)
## [1] 0.6744444
median(sensitivity_trees)
## [1] 0.6666667
sd(sensitivity_trees)
## [1] 0.1627907
sensitivity_svm <- sensitivity[2,]
mean(sensitivity_svm)
## [1] 0.6488889
median(sensitivity_svm)
## [1] 0.6666667
sd(sensitivity_svm)
## [1] 0.1756608
sensitivity_svmR <- sensitivity[3,]
mean(sensitivity_svmR)
## [1] 0.7088889
median(sensitivity_svmR)
## [1] 0.6666667
sd(sensitivity_svmR)
## [1] 0.1846997
sensitivity_logs <- sensitivity[4,]
mean(sensitivity_logs)
## [1] 0.6866667
median(sensitivity_logs)
## [1] 0.6666667
sd(sensitivity_logs)
## [1] 0.1424668
sensitivity_rf <- sensitivity[5,]
mean(sensitivity_rf)
## [1] 0.7255556
median(sensitivity_rf)
## [1] 0.7222222
sd(sensitivity_rf)
## [1] 0.1258021
# specificity
specificity_trees <- specificity[1,]
mean(specificity_trees)
## [1] 0.5922222
median(specificity_trees)
## [1] 0.5555556
sd(specificity_trees)
## [1] 0.1671747
specificity_svm <- specificity[2,]
mean(specificity_svm)
## [1] 0.7233333
median(specificity_svm)
## [1] 0.7777778
sd(specificity_svm)
## [1] 0.2092123
specificity_svmR <- specificity[3,]
mean(specificity_svmR)
## [1] 0.5744444
median(specificity_svmR)
## [1] 0.5555556
sd(specificity_svmR)
## [1] 0.198545
specificity_logs <- specificity[4,]
mean(specificity_logs)
## [1] 0.69
median(specificity_logs)
## [1] 0.6666667
sd(specificity_logs)
## [1] 0.1628213
specificity_rf <- specificity[5,]
mean(specificity_rf)
## [1] 0.7633333
median(specificity_rf)
## [1] 0.7777778
sd(specificity_rf)
## [1] 0.1400613
# fneg
fneg_trees <- fneg[1,]
mean(fneg_trees)
## [1] 2.93
median(fneg_trees)
## [1] 3
sd(fneg_trees)
## [1] 1.465116
fneg_svm <- fneg[2,]
mean(fneg_svm)
## [1] 3.16
median(fneg_svm)
## [1] 3
sd(fneg_svm)
## [1] 1.580947
fneg_svmR <- fneg[3,]
mean(fneg_svmR)
## [1] 2.62
median(fneg_svmR)
## [1] 3
sd(fneg_svmR)
## [1] 1.662297
fneg_logs <- fneg[4,]
mean(fneg_logs)
## [1] 2.82
median(fneg_logs)
## [1] 3
sd(fneg_logs)
## [1] 1.282201
fneg_rf <- fneg[5,]
mean(fneg_rf)
## [1] 2.47
median(fneg_rf)
## [1] 2.5
sd(fneg_rf)
## [1] 1.132219
xdata <- xdataT
ydata.raw <- as.data.frame(clinic1$class)
ydata.raw$row <- DATASET1$ID
#
# # ydata.raw$class <- c(rep(0,28),rep(1,34))
#
ydata.raw$`clinic1$class`[ydata.raw$`clinic1$class` == "P"] <- 0
ydata.raw$`clinic1$class`[ydata.raw$`clinic1$class` == "Pm"] <- 1
rownames(ydata.raw) <- DATASET1$ID
xdata <- xdata[rownames(xdata) %in%
rownames(ydata.raw),]
ydata.raw <- as.data.frame(ydata.raw[rownames(ydata.raw) %in%
rownames(xdata),])
xdata <- xdata[ order(row.names(xdata)), ]
ydata.raw <- ydata.raw[ order(row.names(ydata.raw)), ]
ydata <- as.data.frame(ydata.raw)
ydata$`clinic1$class` <- as.numeric(ydata$`clinic1$class`)
## [1] 0
## [1] 1
## [1] 1011
## [1] "data"
## [1] "EN"
## [1] 1
## [1] 2
## [1] 1022
## [1] "data"
## [1] "EN"
## [1] 2
## [1] 3
## [1] 1033
## [1] "data"
## [1] "EN"
## [1] 3
## [1] 4
## [1] 1044
## [1] "data"
## [1] "EN"
## [1] 4
## [1] 5
## [1] 1055
## [1] "data"
## [1] "EN"
## [1] 5
## [1] 6
## [1] 1066
## [1] "data"
## [1] "EN"
## [1] 6
## [1] 7
## [1] 1077
## [1] "data"
## [1] "EN"
## [1] 7
## [1] 8
## [1] 1088
## [1] "data"
## [1] "EN"
## [1] 8
## [1] 9
## [1] 1099
## [1] "data"
## [1] "EN"
## [1] 9
## [1] 10
## [1] 1110
## [1] "data"
## [1] "EN"
## [1] 10
## [1] 11
## [1] 1121
## [1] "data"
## [1] "EN"
## [1] 11
## [1] 12
## [1] 1132
## [1] "data"
## [1] "EN"
## [1] 12
## [1] 13
## [1] 1143
## [1] "data"
## [1] "EN"
## [1] 13
## [1] 14
## [1] 1154
## [1] "data"
## [1] "EN"
## [1] 14
## [1] 15
## [1] 1165
## [1] "data"
## [1] "EN"
## [1] 15
## [1] 16
## [1] 1176
## [1] "data"
## [1] "EN"
## [1] 16
## [1] 17
## [1] 1187
## [1] "data"
## [1] "EN"
## [1] 17
## [1] 18
## [1] 1198
## [1] "data"
## [1] "EN"
## [1] 18
## [1] 19
## [1] 1209
## [1] "data"
## [1] "EN"
## [1] 19
## [1] 20
## [1] 1220
## [1] "data"
## [1] "EN"
## [1] 20
## [1] 21
## [1] 1231
## [1] "data"
## [1] "EN"
## [1] 21
## [1] 22
## [1] 1242
## [1] "data"
## [1] "EN"
## [1] 22
## [1] 23
## [1] 1253
## [1] "data"
## [1] "EN"
## [1] 23
## [1] 24
## [1] 1264
## [1] "data"
## [1] "EN"
## [1] 24
## [1] 25
## [1] 1275
## [1] "data"
## [1] "EN"
## [1] 25
## [1] 26
## [1] 1286
## [1] "data"
## [1] "EN"
## [1] 26
## [1] 27
## [1] 1297
## [1] "data"
## [1] "EN"
## [1] 27
## [1] 28
## [1] 1308
## [1] "data"
## [1] "EN"
## [1] 28
## [1] 29
## [1] 1319
## [1] "data"
## [1] "EN"
## [1] 29
## [1] 30
## [1] 1330
## [1] "data"
## [1] "EN"
## [1] 30
## [1] 31
## [1] 1341
## [1] "data"
## [1] "EN"
## [1] 31
## [1] 32
## [1] 1352
## [1] "data"
## [1] "EN"
## [1] 32
## [1] 33
## [1] 1363
## [1] "data"
## [1] "EN"
## [1] 33
## [1] 34
## [1] 1374
## [1] "data"
## [1] "EN"
## [1] 34
## [1] 35
## [1] 1385
## [1] "data"
## [1] "EN"
## [1] 35
## [1] 36
## [1] 1396
## [1] "data"
## [1] "EN"
## [1] 36
## [1] 37
## [1] 1407
## [1] "data"
## [1] "EN"
## [1] 37
## [1] 38
## [1] 1418
## [1] "data"
## [1] "EN"
## [1] 38
## [1] 39
## [1] 1429
## [1] "data"
## [1] "EN"
## [1] 39
## [1] 40
## [1] 1440
## [1] "data"
## [1] "EN"
## [1] 40
## [1] 41
## [1] 1451
## [1] "data"
## [1] "EN"
## [1] 41
## [1] 42
## [1] 1462
## [1] "data"
## [1] "EN"
## [1] 42
## [1] 43
## [1] 1473
## [1] "data"
## [1] "EN"
## [1] 43
## [1] 44
## [1] 1484
## [1] "data"
## [1] "EN"
## [1] 44
## [1] 45
## [1] 1495
## [1] "data"
## [1] "EN"
## [1] 45
## [1] 46
## [1] 1506
## [1] "data"
## [1] "EN"
## [1] 46
## [1] 47
## [1] 1517
## [1] "data"
## [1] "EN"
## [1] 47
## [1] 48
## [1] 1528
## [1] "data"
## [1] "EN"
## [1] 48
## [1] 49
## [1] 1539
## [1] "data"
## [1] "EN"
## [1] 49
## [1] 50
## [1] 1550
## [1] "data"
## [1] "EN"
## [1] 50
## [1] 51
## [1] 1561
## [1] "data"
## [1] "EN"
## [1] 51
## [1] 52
## [1] 1572
## [1] "data"
## [1] "EN"
## [1] 52
## [1] 53
## [1] 1583
## [1] "data"
## [1] "EN"
## [1] 53
## [1] 54
## [1] 1594
## [1] "data"
## [1] "EN"
## [1] 54
## [1] 55
## [1] 1605
## [1] "data"
## [1] "EN"
## [1] 55
## [1] 56
## [1] 1616
## [1] "data"
## [1] "EN"
## [1] 56
## [1] 57
## [1] 1627
## [1] "data"
## [1] "EN"
## [1] 57
## [1] 58
## [1] 1638
## [1] "data"
## [1] "EN"
## [1] 58
## [1] 59
## [1] 1649
## [1] "data"
## [1] "EN"
## [1] 59
## [1] 60
## [1] 1660
## [1] "data"
## [1] "EN"
## [1] 60
## [1] 61
## [1] 1671
## [1] "data"
## [1] "EN"
## [1] 61
## [1] 62
## [1] 1682
## [1] "data"
## [1] "EN"
## [1] 62
## [1] 63
## [1] 1693
## [1] "data"
## [1] "EN"
## [1] 63
## [1] 64
## [1] 1704
## [1] "data"
## [1] "EN"
## [1] 64
## [1] 65
## [1] 1715
## [1] "data"
## [1] "EN"
## [1] 65
## [1] 66
## [1] 1726
## [1] "data"
## [1] "EN"
## [1] 66
## [1] 67
## [1] 1737
## [1] "data"
## [1] "EN"
## [1] 67
## [1] 68
## [1] 1748
## [1] "data"
## [1] "EN"
## [1] 68
## [1] 69
## [1] 1759
## [1] "data"
## [1] "EN"
## [1] 69
## [1] 70
## [1] 1770
## [1] "data"
## [1] "EN não selecionou variaveis"
## [1] 69
## [1] 71
## [1] 1781
## [1] "data"
## [1] "EN"
## [1] 70
## [1] 72
## [1] 1792
## [1] "data"
## [1] "EN"
## [1] 71
## [1] 73
## [1] 1803
## [1] "data"
## [1] "EN"
## [1] 72
## [1] 74
## [1] 1814
## [1] "data"
## [1] "EN"
## [1] 73
## [1] 75
## [1] 1825
## [1] "data"
## [1] "EN"
## [1] 74
## [1] 76
## [1] 1836
## [1] "data"
## [1] "EN"
## [1] 75
## [1] 77
## [1] 1847
## [1] "data"
## [1] "EN"
## [1] 76
## [1] 78
## [1] 1858
## [1] "data"
## [1] "EN"
## [1] 77
## [1] 79
## [1] 1869
## [1] "data"
## [1] "EN"
## [1] 78
## [1] 80
## [1] 1880
## [1] "data"
## [1] "EN"
## [1] 79
## [1] 81
## [1] 1891
## [1] "data"
## [1] "EN"
## [1] 80
## [1] 82
## [1] 1902
## [1] "data"
## [1] "EN"
## [1] 81
## [1] 83
## [1] 1913
## [1] "data"
## [1] "EN"
## [1] 82
## [1] 84
## [1] 1924
## [1] "data"
## [1] "EN não selecionou variaveis"
## [1] 82
## [1] 85
## [1] 1935
## [1] "data"
## [1] "EN"
## [1] 83
## [1] 86
## [1] 1946
## [1] "data"
## [1] "EN"
## [1] 84
## [1] 87
## [1] 1957
## [1] "data"
## [1] "EN"
## [1] 85
## [1] 88
## [1] 1968
## [1] "data"
## [1] "EN"
## [1] 86
## [1] 89
## [1] 1979
## [1] "data"
## [1] "EN"
## [1] 87
## [1] 90
## [1] 1990
## [1] "data"
## [1] "EN"
## [1] 88
## [1] 91
## [1] 2001
## [1] "data"
## [1] "EN"
## [1] 89
## [1] 92
## [1] 2012
## [1] "data"
## [1] "EN"
## [1] 90
## [1] 93
## [1] 2023
## [1] "data"
## [1] "EN"
## [1] 91
## [1] 94
## [1] 2034
## [1] "data"
## [1] "EN"
## [1] 92
## [1] 95
## [1] 2045
## [1] "data"
## [1] "EN"
## [1] 93
## [1] 96
## [1] 2056
## [1] "data"
## [1] "EN"
## [1] 94
## [1] 97
## [1] 2067
## [1] "data"
## [1] "EN"
## [1] 95
## [1] 98
## [1] 2078
## [1] "data"
## [1] "EN"
## [1] 96
## [1] 99
## [1] 2089
## [1] "data"
## [1] "EN"
## [1] 97
## [1] 100
## [1] 2100
## [1] "data"
## [1] "EN"
## [1] 98
## [1] 101
## [1] 2111
## [1] "data"
## [1] "EN"
## [1] 99
## [1] 102
## [1] 2122
## [1] "data"
## [1] "EN"
## [1] 1
## [1] 0
## [1] 2133
## [1] "data"
## [1] "iTwiner"
## [1] 2
## [1] 1
## [1] 2144
## [1] "data"
## [1] "iTwiner"
## [1] 3
## [1] 2
## [1] 2155
## [1] "data"
## [1] "iTwiner"
## [1] 4
## [1] 3
## [1] 2166
## [1] "data"
## [1] "iTwiner"
## [1] 5
## [1] 4
## [1] 2177
## [1] "data"
## [1] "iTwiner"
## [1] 6
## [1] 5
## [1] 2188
## [1] "data"
## [1] "iTwiner"
## [1] 7
## [1] 6
## [1] 2199
## [1] "data"
## [1] "iTwiner"
## [1] 8
## [1] 7
## [1] 2210
## [1] "data"
## [1] "iTwiner"
## [1] 9
## [1] 8
## [1] 2221
## [1] "data"
## [1] "iTwiner"
## [1] 10
## [1] 9
## [1] 2232
## [1] "data"
## [1] "iTwiner"
## [1] 11
## [1] 10
## [1] 2243
## [1] "data"
## [1] "iTwiner"
## [1] 12
## [1] 11
## [1] 2254
## [1] "data"
## [1] "iTwiner"
## [1] 13
## [1] 12
## [1] 2265
## [1] "data"
## [1] "iTwiner"
## [1] 14
## [1] 13
## [1] 2276
## [1] "data"
## [1] "iTwiner"
## [1] 15
## [1] 14
## [1] 2287
## [1] "data"
## [1] "iTwiner"
## [1] 16
## [1] 15
## [1] 2298
## [1] "data"
## [1] "iTwiner"
## [1] 17
## [1] 16
## [1] 2309
## [1] "data"
## [1] "iTwiner"
## [1] 18
## [1] 17
## [1] 2320
## [1] "data"
## [1] "iTwiner"
## [1] 19
## [1] 18
## [1] 2331
## [1] "data"
## [1] "iTwiner"
## [1] 20
## [1] 19
## [1] 2342
## [1] "data"
## [1] "iTwiner"
## [1] 21
## [1] 20
## [1] 2353
## [1] "data"
## [1] "iTwiner"
## [1] 22
## [1] 21
## [1] 2364
## [1] "data"
## [1] "iTwiner"
## [1] 23
## [1] 22
## [1] 2375
## [1] "data"
## [1] "iTwiner"
## [1] 24
## [1] 23
## [1] 2386
## [1] "data"
## [1] "iTwiner"
## [1] 25
## [1] 24
## [1] 2397
## [1] "data"
## [1] "iTwiner"
## [1] 26
## [1] 25
## [1] 2408
## [1] "data"
## [1] "iTwiner"
## [1] 27
## [1] 26
## [1] 2419
## [1] "data"
## [1] "iTwiner"
## [1] 28
## [1] 27
## [1] 2430
## [1] "data"
## [1] "iTwiner"
## [1] 29
## [1] 28
## [1] 2441
## [1] "data"
## [1] "iTwiner"
## [1] 30
## [1] 29
## [1] 2452
## [1] "data"
## [1] "iTwiner"
## [1] 31
## [1] 30
## [1] 2463
## [1] "data"
## [1] "iTwiner"
## [1] 32
## [1] 31
## [1] 2474
## [1] "data"
## [1] "iTwiner"
## [1] 33
## [1] 32
## [1] 2485
## [1] "data"
## [1] "iTwiner"
## [1] 34
## [1] 33
## [1] 2496
## [1] "data"
## [1] "iTwiner"
## [1] 35
## [1] 34
## [1] 2507
## [1] "data"
## [1] "iTwiner"
## [1] 36
## [1] 35
## [1] 2518
## [1] "data"
## [1] "iTwiner"
## [1] 37
## [1] 36
## [1] 2529
## [1] "data"
## [1] "iTwiner"
## [1] 38
## [1] 37
## [1] 2540
## [1] "data"
## [1] "iTwiner"
## [1] 39
## [1] 38
## [1] 2551
## [1] "data"
## [1] "iTwiner"
## [1] 40
## [1] 39
## [1] 2562
## [1] "data"
## [1] "iTwiner"
## [1] 41
## [1] 40
## [1] 2573
## [1] "data"
## [1] "iTwiner"
## [1] 42
## [1] 41
## [1] 2584
## [1] "data"
## [1] "iTwiner"
## [1] 43
## [1] 42
## [1] 2595
## [1] "data"
## [1] "iTwiner"
## [1] 44
## [1] 43
## [1] 2606
## [1] "data"
## [1] "iTwiner"
## [1] 45
## [1] 44
## [1] 2617
## [1] "data"
## [1] "iTwiner"
## [1] 46
## [1] 45
## [1] 2628
## [1] "data"
## [1] "iTwiner"
## [1] 47
## [1] 46
## [1] 2639
## [1] "data"
## [1] "iTwiner"
## [1] 48
## [1] 47
## [1] 2650
## [1] "data"
## [1] "iTwiner"
## [1] 49
## [1] 48
## [1] 2661
## [1] "data"
## [1] "iTwiner"
## [1] 50
## [1] 49
## [1] 2672
## [1] "data"
## [1] "iTwiner"
## [1] 51
## [1] 50
## [1] 2683
## [1] "data"
## [1] "iTwiner"
## [1] 52
## [1] 51
## [1] 2694
## [1] "data"
## [1] "iTwiner"
## [1] 53
## [1] 52
## [1] 2705
## [1] "data"
## [1] "iTwiner"
## [1] 54
## [1] 53
## [1] 2716
## [1] "data"
## [1] "iTwiner"
## [1] 55
## [1] 54
## [1] 2727
## [1] "data"
## [1] "iTwiner"
## [1] 56
## [1] 55
## [1] 2738
## [1] "data"
## [1] "iTwiner"
## [1] 57
## [1] 56
## [1] 2749
## [1] "data"
## [1] "iTwiner"
## [1] 58
## [1] 57
## [1] 2760
## [1] "data"
## [1] "iTwiner"
## [1] 59
## [1] 58
## [1] 2771
## [1] "data"
## [1] "iTwiner"
## [1] 60
## [1] 59
## [1] 2782
## [1] "data"
## [1] "iTwiner"
## [1] 61
## [1] 60
## [1] 2793
## [1] "data"
## [1] "iTwiner"
## [1] 62
## [1] 61
## [1] 2804
## [1] "data"
## [1] "iTwiner"
## [1] 63
## [1] 62
## [1] 2815
## [1] "data"
## [1] "iTwiner"
## [1] 64
## [1] 63
## [1] 2826
## [1] "data"
## [1] "iTwiner"
## [1] 65
## [1] 64
## [1] 2837
## [1] "data"
## [1] "iTwiner"
## [1] 66
## [1] 65
## [1] 2848
## [1] "data"
## [1] "iTwiner"
## [1] 67
## [1] 66
## [1] 2859
## [1] "data"
## [1] "iTwiner"
## [1] 68
## [1] 67
## [1] 2870
## [1] "data"
## [1] "iTwiner"
## [1] 69
## [1] 68
## [1] 2881
## [1] "data"
## [1] "iTwiner"
## [1] 70
## [1] 69
## [1] 2892
## [1] "data"
## [1] "iTwiner"
## [1] 71
## [1] 70
## [1] 2903
## [1] "data"
## [1] "iTwiner"
## [1] 72
## [1] 71
## [1] 2914
## [1] "data"
## [1] "iTwiner"
## [1] 73
## [1] 72
## [1] 2925
## [1] "data"
## [1] "iTwiner"
## [1] 74
## [1] 73
## [1] 2936
## [1] "data"
## [1] "iTwiner"
## [1] 75
## [1] 74
## [1] 2947
## [1] "data"
## [1] "iTwiner"
## [1] 76
## [1] 75
## [1] 2958
## [1] "data"
## [1] "iTwiner"
## [1] 77
## [1] 76
## [1] 2969
## [1] "data"
## [1] "iTwiner"
## [1] 78
## [1] 77
## [1] 2980
## [1] "data"
## [1] "iTwiner"
## [1] 79
## [1] 78
## [1] 2991
## [1] "data"
## [1] "iTwiner"
## [1] 80
## [1] 79
## [1] 3002
## [1] "data"
## [1] "iTwiner"
## [1] 81
## [1] 80
## [1] 3013
## [1] "data"
## [1] "iTwiner"
## [1] 82
## [1] 81
## [1] 3024
## [1] "data"
## [1] "iTwiner"
## [1] 83
## [1] 82
## [1] 3035
## [1] "data"
## [1] "iTwiner"
## [1] 84
## [1] 83
## [1] 3046
## [1] "data"
## [1] "iTwiner"
## [1] 85
## [1] 84
## [1] 3057
## [1] "data"
## [1] "iTwiner"
## [1] 86
## [1] 85
## [1] 3068
## [1] "data"
## [1] "iTwiner"
## [1] 87
## [1] 86
## [1] 3079
## [1] "data"
## [1] "iTwiner"
## [1] 88
## [1] 87
## [1] 3090
## [1] "data"
## [1] "iTwiner"
## [1] 89
## [1] 88
## [1] 3101
## [1] "data"
## [1] "iTwiner"
## [1] 90
## [1] 89
## [1] 3112
## [1] "data"
## [1] "iTwiner"
## [1] 91
## [1] 90
## [1] 3123
## [1] "data"
## [1] "iTwiner"
## [1] 92
## [1] 91
## [1] 3134
## [1] "data"
## [1] "iTwiner"
## [1] 93
## [1] 92
## [1] 3145
## [1] "data"
## [1] "iTwiner"
## [1] 94
## [1] 93
## [1] 3156
## [1] "data"
## [1] "iTwiner"
## [1] 95
## [1] 94
## [1] 3167
## [1] "data"
## [1] "iTwiner"
## [1] 96
## [1] 95
## [1] 3178
## [1] "data"
## [1] "iTwiner"
## [1] 97
## [1] 96
## [1] 3189
## [1] "data"
## [1] "iTwiner"
## [1] 98
## [1] 97
## [1] 3200
## [1] "data"
## [1] "iTwiner"
## [1] 99
## [1] 98
## [1] 3211
## [1] "data"
## [1] "iTwiner"
## [1] 100
## [1] 99
## [1] 3222
## [1] "data"
## [1] "iTwiner"
runs_en_tr <- which(acc_cox_tr[1,]!=0)
runs_itw_tr <- which(acc_cox_tr[2,]!=0)
runs_en_ts <- which(acc_cox_tes[1,]!=0)
runs_itw_ts <- which(acc_cox_tes[2,]!=0)
# median number of variables selected
nvar_en <- nvar_selected_1[1,]
nvar_en <- nvar_en[runs_en_tr]
mean(nvar_en)
## [1] 66.97
median(nvar_en)
## [1] 59
sd(nvar_en)
## [1] 32.63092
nvar_tw <- nvar_selected_1[2,]
nvar_tw <- nvar_tw[runs_itw_tr]
mean(nvar_tw)
## [1] 36.54
median(nvar_tw)
## [1] 33
sd(nvar_tw)
## [1] 21.97539
# EN train
acc_cox_tr_EN <- acc_cox_tr[1,]
acc_cox_tr_EN <- acc_cox_tr_EN[runs_en_tr]
#hist(acc_cox_tr_EN)
mean(acc_cox_tr_EN)
## [1] 0.867381
median(acc_cox_tr_EN)
## [1] 0.8571429
sd(acc_cox_tr_EN)
## [1] 0.03285491
# EN test
acc_cox_tes_EN <- acc_cox_tes[1,]
acc_cox_tes_EN <- acc_cox_tes_EN[runs_en_ts]
#hist(acc_cox_tes_EN)
mean(acc_cox_tes_EN)
## [1] 0.6833333
median(acc_cox_tes_EN)
## [1] 0.6666667
sd(acc_cox_tes_EN)
## [1] 0.09259259
#iTwiner train
acc_cox_tr_iTwiner <- acc_cox_tr[2,]
acc_cox_tr_iTwiner <- acc_cox_tr_iTwiner[runs_itw_tr]
#hist(acc_cox_tr_iTwiner)
mean(acc_cox_tr_iTwiner)
## [1] 0.8511905
median(acc_cox_tr_iTwiner)
## [1] 0.8571429
sd(acc_cox_tr_iTwiner)
## [1] 0.02918513
#iTwiner test
acc_cox_tes_iTwiner <- acc_cox_tes[2,]
acc_cox_tes_iTwiner <- acc_cox_tes_iTwiner[runs_itw_ts]
#hist(acc_cox_tes_iTwiner)
mean(acc_cox_tes_iTwiner)
## [1] 0.7938889
median(acc_cox_tes_iTwiner)
## [1] 0.7777778
sd(acc_cox_tes_iTwiner)
## [1] 0.07461712
# EN train
miscl_cox_tr_EN <- miscl_cox_tr[1,]
miscl_cox_tr_EN <- miscl_cox_tr_EN[runs_en_tr]
mean(miscl_cox_tr_EN)
## [1] 5.57
median(miscl_cox_tr_EN)
## [1] 6
sd(miscl_cox_tr_EN)
## [1] 1.379906
# EN test
miscl_cox_tes_EN <- miscl_cox_tes[1,]
miscl_cox_tes_EN <- miscl_cox_tes_EN[runs_en_ts]
mean(miscl_cox_tes_EN)
## [1] 5.7
median(miscl_cox_tes_EN)
## [1] 6
sd(miscl_cox_tes_EN)
## [1] 1.666667
#iTwiner train
miscl_cox_tr_iTwiner <- miscl_cox_tr[2,]
miscl_cox_tr_iTwiner <- miscl_cox_tr_iTwiner[runs_itw_tr]
mean(miscl_cox_tr_iTwiner)
## [1] 6.25
median(miscl_cox_tr_iTwiner)
## [1] 6
sd(miscl_cox_tr_iTwiner)
## [1] 1.225775
#iTwiner test
miscl_cox_tes_iTwiner <- miscl_cox_tes[2,]
miscl_cox_tes_iTwiner <- miscl_cox_tes_iTwiner[runs_itw_ts]
mean(miscl_cox_tes_iTwiner)
## [1] 3.71
median(miscl_cox_tes_iTwiner)
## [1] 4
sd(miscl_cox_tes_iTwiner)
## [1] 1.343108
#EN
fneg_reg_en_train <- fneg_reg_tr[1,runs_en_tr]
mean(fneg_reg_en_train)
## [1] 5.39
median(fneg_reg_en_train)
## [1] 5.5
sd(fneg_reg_en_train)
## [1] 1.270449
fneg_reg_en_test <- fneg_reg_ts[1,runs_en_ts]
mean(fneg_reg_en_test)
## [1] 3.1
median(fneg_reg_en_test)
## [1] 3
sd(fneg_reg_en_test)
## [1] 1.13262
#iTwiner
fneg_reg_iTwiner_train <- fneg_reg_tr[2,runs_itw_tr]
mean(fneg_reg_iTwiner_train)
## [1] 6.25
median(fneg_reg_iTwiner_train)
## [1] 6
sd(fneg_reg_iTwiner_train)
## [1] 1.225775
fneg_reg_iTwiner_test <- fneg_reg_ts[2,runs_itw_ts]
mean(fneg_reg_iTwiner_test)
## [1] 3.62
median(fneg_reg_iTwiner_test)
## [1] 4
sd(fneg_reg_iTwiner_test)
## [1] 1.361669
# EN train
sensitivity_cox_tr_EN <- sensitivity_cox_tr[1,]
sensitivity_cox_tr_EN <- sensitivity_cox_tr_EN[runs_en_tr]
mean(sensitivity_cox_tr_EN)
## [1] 0.7433333
median(sensitivity_cox_tr_EN)
## [1] 0.7380952
sd(sensitivity_cox_tr_EN)
## [1] 0.06049757
# EN test
sensitivity_cox_tes_EN <- sensitivity_cox_tes[1,]
sensitivity_cox_tes_EN <- sensitivity_cox_tes_EN[runs_en_ts]
mean(sensitivity_cox_tes_EN)
## [1] 0.6555556
median(sensitivity_cox_tes_EN)
## [1] 0.6666667
sd(sensitivity_cox_tes_EN)
## [1] 0.1258467
#iTwiner train
sensitivity_cox_tr_iTwiner <- sensitivity_cox_tr[2,]
sensitivity_cox_tr_iTwiner <- sensitivity_cox_tr_iTwiner[runs_itw_tr]
mean(sensitivity_cox_tr_iTwiner)
## [1] 0.702381
median(sensitivity_cox_tr_iTwiner)
## [1] 0.7142857
sd(sensitivity_cox_tr_iTwiner)
## [1] 0.05837026
#iTwiner test
sensitivity_cox_tes_iTwiner <- sensitivity_cox_tes[2,]
sensitivity_cox_tes_iTwiner <- sensitivity_cox_tes_iTwiner[runs_itw_ts]
mean(sensitivity_cox_tes_iTwiner)
## [1] 0.5977778
median(sensitivity_cox_tes_iTwiner)
## [1] 0.5555556
sd(sensitivity_cox_tes_iTwiner)
## [1] 0.1512965
# EN train
specificity_cox_tr_EN <- specificity_cox_tr[1,]
specificity_cox_tr_EN <- specificity_cox_tr_EN[runs_en_tr]
mean(specificity_cox_tr_EN)
## [1] 0.9914286
median(specificity_cox_tr_EN)
## [1] 1
sd(specificity_cox_tr_EN)
## [1] 0.02180604
# EN test
specificity_cox_tes_EN <- specificity_cox_tes[1,]
specificity_cox_tes_EN <- specificity_cox_tes_EN[runs_en_ts]
mean(specificity_cox_tes_EN)
## [1] 0.7111111
median(specificity_cox_tes_EN)
## [1] 0.6666667
sd(specificity_cox_tes_EN)
## [1] 0.1421338
#iTwiner train
specificity_cox_tr_iTwiner <- specificity_cox_tr[2,]
specificity_cox_tr_iTwiner <- specificity_cox_tr_iTwiner[runs_itw_tr]
mean(specificity_cox_tr_iTwiner)
## [1] 1
median(specificity_cox_tr_iTwiner)
## [1] 1
sd(specificity_cox_tr_iTwiner)
## [1] 0
#iTwiner test
specificity_cox_tes_iTwiner <- specificity_cox_tes[2,]
specificity_cox_tes_iTwiner <- specificity_cox_tes_iTwiner[runs_itw_ts]
mean(specificity_cox_tes_iTwiner)
## [1] 0.99
median(specificity_cox_tes_iTwiner)
## [1] 1
sd(specificity_cox_tes_iTwiner)
## [1] 0.03564733
# EN train
auc_cox_tr_EN <- auc_cox_tr[1,]
auc_cox_tr_EN <- auc_cox_tr_EN[runs_en_tr]
mean(auc_cox_tr_EN)
## [1] 0.867381
median(auc_cox_tr_EN)
## [1] 0.8571429
sd(auc_cox_tr_EN)
## [1] 0.03285491
# EN test
auc_cox_tes_EN <- auc_cox_tes[1,]
auc_cox_tes_EN <- auc_cox_tes_EN[runs_en_ts]
mean(auc_cox_tes_EN)
## [1] 0.6833333
median(auc_cox_tes_EN)
## [1] 0.6666667
sd(auc_cox_tes_EN)
## [1] 0.09259259
#iTwiner train
auc_cox_tr_iTwiner <- auc_cox_tr[2,]
auc_cox_tr_iTwiner <- auc_cox_tr_iTwiner[runs_itw_tr]
mean(auc_cox_tr_iTwiner)
## [1] 0.8511905
median(auc_cox_tr_iTwiner)
## [1] 0.8571429
sd(auc_cox_tr_iTwiner)
## [1] 0.02918513
#iTwiner test
auc_cox_tes_iTwiner <- auc_cox_tes[2,]
auc_cox_tes_iTwiner <- auc_cox_tes_iTwiner[runs_itw_ts]
mean(auc_cox_tes_iTwiner)
## [1] 0.7938889
median(auc_cox_tes_iTwiner)
## [1] 0.7777778
sd(auc_cox_tes_iTwiner)
## [1] 0.07461712
Variables always selected
var_selected_alw_select_en <- var_selected_en1[runs_en_ts]
var_selected_alw_select_en1 <- Reduce(intersect,var_selected_alw_select_en)
print(paste("variables always selected by EN = ",length(var_selected_alw_select_en1)))
## [1] "variables always selected by EN = 0"
var_selected_alw_select_iTwiner <- var_selected_iTwiner1[runs_itw_ts]
var_selected_alw_select_iTwiner1 <- Reduce(intersect,var_selected_alw_select_iTwiner)
print(paste("variables always selected by iTwiner = ",length(var_selected_alw_select_iTwiner1)))
## [1] "variables always selected by iTwiner = 2"
Variables selected in 50 bootstrap samples
l = length(var_selected_alw_select_en)
var_selected_50_select_en <- table(unlist(var_selected_alw_select_en))
var_selected_50_select_en <- as.data.frame(var_selected_50_select_en)
var_selected_50_select_en <- subset(var_selected_50_select_en, Freq > 0.50*l)
print(paste("variables selected 50% by EN = ",length(var_selected_50_select_en$Var1)))
## [1] "variables selected 50% by EN = 8"
var_selected_50_select_en$Var1
## [1] CD8B LINC01871 PF4 RPL6P9 SFRP5 SLITRK1 SOHLH2
## [8] XRCC6P2
## 1815 Levels: AASDHPPT ABCC12 ABCD1P4 ABCE1 ABCF2-H2BE1 ABHD15 ABHD6 ACLY ... ZWINT
#
l = length(var_selected_alw_select_iTwiner)
var_selected_50_select_iTwiner <- table(unlist(var_selected_alw_select_iTwiner))
var_selected_50_select_iTwiner <- as.data.frame(var_selected_50_select_iTwiner)
var_selected_50_select_iTwiner <- subset(var_selected_50_select_iTwiner, Freq > 0.50*l)
print(paste("variables selected 50% by iTwiner = ",length(var_selected_50_select_iTwiner$Var1)))
## [1] "variables selected 50% by iTwiner = 19"
var_selected_50_select_iTwiner$Var1
## [1] COX6CP7 EEF1B2P6 FABP7P2 HNRNPA1P42 HSPD1P7 IGKV2D-30
## [7] KDM4F LILRB1-AS1 LINC01100 LINC01335 MTCO2P18 OR51K1P
## [13] RAC1P3 RPL31P35 RPL7P58 SULT6B2P TRAJ20 TRBV11-1
## [19] XRCC6P2
## 236 Levels: ACTN4P1 AGTR2 ALOX15P2 AMMECR1LP1 ANKRD20A10P APOOP4 ... XRCC6P2
var_selected_50_select_en <- table(unlist(var_selected_alw_select_en))
var_selected_50_select_en <- as.data.frame(var_selected_50_select_en)
var_selected_50_select_en <- var_selected_50_select_en[order(var_selected_50_select_en$Freq, decreasing = T),]
hist(var_selected_50_select_en$Freq)
top100_en <- var_selected_50_select_en[1:100,]
top100_en <- top100_en$Var1
top50_en <- top100_en[1:50]
top100_en <- as.data.frame(top100_en)
write_xlsx(top100_en,"List_top100_en_d1.xlsx")
var_selected_50_select_iTwiner <- table(unlist(var_selected_alw_select_iTwiner))
var_selected_50_select_iTwiner <- as.data.frame(var_selected_50_select_iTwiner)
var_selected_50_select_iTwiner <- var_selected_50_select_iTwiner[order(var_selected_50_select_iTwiner$Freq, decreasing = T),]
hist(var_selected_50_select_iTwiner$Freq)
top100_itw <- var_selected_50_select_iTwiner[1:100,]
top100_itw <- top100_itw$Var1
top50_itw <- top100_itw[1:50]
top100_itw <- as.data.frame(top100_itw)
write_xlsx(top100_itw,"List_top100_itw_d1.xlsx")
variables in common between EN and iTwiner
common_var_selected_50_en_iTwiner <- var_selected_50_select_iTwiner$Var1[which(var_selected_50_select_iTwiner$Var1 %in% var_selected_50_select_en$Var1)]
length(common_var_selected_50_en_iTwiner)
## [1] 44
common_var_selected_50_en_iTwiner
## [1] RAC1P3 XRCC6P2 OR51K1P LINC01100 EEF1B2P6 HSPD1P7
## [7] FABP7P2 RPL31P35 COX6CP7 TRAJ20 RNU6-1085P MIR8052
## [13] TRBJ2-1 DNAJC19P3 RNU6-337P TRAJ61 MIR6816 TIMM8AP1
## [19] TRAJ25 MTCO3P42 RNU4-39P TMSB4XP1 HNRNPCL1 LINC02178
## [25] EIF4EP3 IGHJ1 MIR193B MIR4439 TRAJ28 ATP5MGP5
## [31] ATP6V1E1P2 MIR1288 MIR564 MIRLET7A2 MIR4254 BCRP7
## [37] GDI2P1 ISCA1P3 MIR409 PMPCAP1 RNA5SP74 RNU6-865P
## [43] RNU6ATAC7P SYCN
## 236 Levels: ACTN4P1 AGTR2 ALOX15P2 AMMECR1LP1 ANKRD20A10P APOOP4 ... XRCC6P2
xdata <- rnaseq1[ order(row.names(rnaseq1)), ]
nomesgenes <- c(as.vector(top50_en),as.vector(top50_itw))
xdata <- xdata[,nomesgenes]
rownames(DATASET1_bal) <- DATASET1_bal$ID
ydata <- as.data.frame(DATASET1_bal[ order(row.names(DATASET1_bal)), ])
rownames(ydata) <- ydata$ID
xdata <- xdata[rownames(xdata) %in%
rownames(ydata),]
ydata <- as.data.frame(ydata[rownames(ydata) %in%
rownames(xdata),])
# keep features with standard deviation > 0
xdata <- xdata[,sapply(seq(ncol(xdata)), function(ix) {sd(xdata[,ix])}) != 0]
xdata <- t(xdata)
group <- as.factor(ydata$class)
class <- as.data.frame(ydata$class)
edgeR.DGElist <- DGEList(counts=xdata, group = group)
# remove genes that do not have one count per million in at least 5 samples
keep <- rowSums(cpm(edgeR.DGElist) >= 1) >= 5
edgeR.DGElist <- edgeR.DGElist[keep ,]
# specify the design setup
design <- model.matrix(~group)
# estimate the dispersion for all read counts across all samples
edgeR.DGElist <- estimateDisp(edgeR.DGElist, design)
# fit the negative binomial model
edger_fit <- glmFit(edgeR.DGElist, design )
# perform the testing for every gene using the neg. binomial model
edger_lrt <- glmLRT(edger_fit)
summary(decideTests(edger_lrt))
## groupPm
## Down 4
## NotSig 93
## Up 3
# extract results from edger _lrt$ table plus adjusted p- values
DGE.results_edgeR <- topTags(edger_lrt, n = Inf , sort.by = "PValue" , adjust.method = "BH" )
topTags(DGE.results_edgeR) #table with the top10 DEGs
## Coefficient: groupPm
## logFC logCPM LR PValue FDR
## MIR8052 2.7372046 6.009474 19.669113 9.207769e-06 0.0006421211
## RNU6-337P 3.2286673 6.017764 19.033788 1.284242e-05 0.0006421211
## RAC1P3 -2.9946864 6.419308 13.309733 2.640319e-04 0.0066007975
## RAC1P3.1 -2.9946864 6.419308 13.309733 2.640319e-04 0.0066007975
## XRCC6P2 -3.3430257 6.364429 10.601001 1.130265e-03 0.0188377469
## XRCC6P2.1 -3.3430257 6.364429 10.601001 1.130265e-03 0.0188377469
## RNU6-1266P 3.1010187 7.350505 9.753307 1.790004e-03 0.0255714897
## MGST1 0.6999765 16.087821 6.929356 8.479222e-03 0.1059902710
## MIR7107 2.8806668 7.217542 6.048031 1.392181e-02 0.1546868129
## EEF1B2P6 -2.0438206 6.049605 5.732342 1.665525e-02 0.1612076403
genes_deg <- DGE.results_edgeR$table
genes_deg <- genes_deg[which(genes_deg$FDR < 0.05),]
dim(genes_deg)#genes found to be differentially expressed
## [1] 7 5
genes_deg$row <- row.names(genes_deg)
write_xlsx(genes_deg,"genes_deg_enitw_d1.xlsx")
# highly expressed genes
high <- genes_deg[genes_deg$logFC > 0,]
dim(high)
## [1] 3 6
#low expressed genes
low <- genes_deg[genes_deg$logFC < 0,]
dim(low)
## [1] 4 6
xdata <- xdataT
ydata.raw <- as.data.frame(clinic1$class)
ydata.raw$row <- DATASET1$ID
#ydata.raw$class <- c(rep(0,28),rep(1,34))
ydata.raw$`clinic1$class`[ydata.raw$`clinic1$class` == "P"] <- 0
ydata.raw$`clinic1$class`[ydata.raw$`clinic1$class` == "Pm"] <- 1
rownames(ydata.raw) <- DATASET1$ID
xdata <- xdata[rownames(xdata) %in%
rownames(ydata.raw),]
ydata.raw <- as.data.frame(ydata.raw[rownames(ydata.raw) %in%
rownames(xdata),])
xdata <- xdata[ order(row.names(xdata)), ]
ydata.raw <- ydata.raw[ order(row.names(ydata.raw)), ]
ydata <- as.data.frame(ydata.raw[,1:2])
colnames(ydata) <- c("class","id")
names <- as.vector(top50_en)
xdata_en <- as.data.frame(xdata[,names])
nomesgenes <- colnames(xdata_en)
colnames(xdata_en) <- paste0("Var", 1:50)
xdata_en$type <- as.factor(ydata$class)
ydata$class <- as.numeric(ydata$class)
## [1] 1
## [1] 1011
## [1] 2
## [1] 1022
## [1] 3
## [1] 1033
## [1] 4
## [1] 1044
## [1] 5
## [1] 1055
## [1] 6
## [1] 1066
## [1] 7
## [1] 1077
## [1] 8
## [1] 1088
## [1] 9
## [1] 1099
## [1] 10
## [1] 1110
## [1] 11
## [1] 1121
## [1] 12
## [1] 1132
## [1] 13
## [1] 1143
## [1] 14
## [1] 1154
## [1] 15
## [1] 1165
## [1] 16
## [1] 1176
## [1] 17
## [1] 1187
## [1] 18
## [1] 1198
## [1] 19
## [1] 1209
## [1] 20
## [1] 1220
## [1] 21
## [1] 1231
## [1] 22
## [1] 1242
## [1] 23
## [1] 1253
## [1] 24
## [1] 1264
## [1] 25
## [1] 1275
## [1] 26
## [1] 1286
## [1] 27
## [1] 1297
## [1] 28
## [1] 1308
## [1] 29
## [1] 1319
## [1] 30
## [1] 1330
## [1] 31
## [1] 1341
## [1] 32
## [1] 1352
## [1] 33
## [1] 1363
## [1] 34
## [1] 1374
## [1] 35
## [1] 1385
## [1] 36
## [1] 1396
## [1] 37
## [1] 1407
## [1] 38
## [1] 1418
## [1] 39
## [1] 1429
## [1] 40
## [1] 1440
## [1] 41
## [1] 1451
## [1] 42
## [1] 1462
## [1] 43
## [1] 1473
## [1] 44
## [1] 1484
## [1] 45
## [1] 1495
## [1] 46
## [1] 1506
## [1] 47
## [1] 1517
## [1] 48
## [1] 1528
## [1] 49
## [1] 1539
## [1] 50
## [1] 1550
## [1] 51
## [1] 1561
## [1] 52
## [1] 1572
## [1] 53
## [1] 1583
## [1] 54
## [1] 1594
## [1] 55
## [1] 1605
## [1] 56
## [1] 1616
## [1] 57
## [1] 1627
## [1] 58
## [1] 1638
## [1] 59
## [1] 1649
## [1] 60
## [1] 1660
## [1] 61
## [1] 1671
## [1] 62
## [1] 1682
## [1] 63
## [1] 1693
## [1] 64
## [1] 1704
## [1] 65
## [1] 1715
## [1] 66
## [1] 1726
## [1] 67
## [1] 1737
## [1] 68
## [1] 1748
## [1] 69
## [1] 1759
## [1] 70
## [1] 1770
## [1] 71
## [1] 1781
## [1] 72
## [1] 1792
## [1] 73
## [1] 1803
## [1] 74
## [1] 1814
## [1] 75
## [1] 1825
## [1] 76
## [1] 1836
## [1] 77
## [1] 1847
## [1] 78
## [1] 1858
## [1] 79
## [1] 1869
## [1] 80
## [1] 1880
## [1] 81
## [1] 1891
## [1] 82
## [1] 1902
## [1] 83
## [1] 1913
## [1] 84
## [1] 1924
## [1] 85
## [1] 1935
## [1] 86
## [1] 1946
## [1] 87
## [1] 1957
## [1] 88
## [1] 1968
## [1] 89
## [1] 1979
## [1] 90
## [1] 1990
## [1] 91
## [1] 2001
## [1] 92
## [1] 2012
## [1] 93
## [1] 2023
## [1] 94
## [1] 2034
## [1] 95
## [1] 2045
## [1] 96
## [1] 2056
## [1] 97
## [1] 2067
## [1] 98
## [1] 2078
## [1] 99
## [1] 2089
## [1] 100
## [1] 2100
# acc
acc_trees <- acc_enplus_train[1,]
#hist(acc_trees)
mean(acc_trees)
## [1] 0.9902381
median(acc_trees)
## [1] 1
sd(acc_trees)
## [1] 0.01439552
acc_svm <- acc_enplus_train[2,]
#hist(acc_svm)
mean(acc_svm)
## [1] 0.9288095
median(acc_svm)
## [1] 0.9285714
sd(acc_svm)
## [1] 0.02356658
acc_svmR <- acc_enplus_train[3,]
#hist(acc_svmR)
mean(acc_svmR)
## [1] 0.8940476
median(acc_svmR)
## [1] 0.9285714
sd(acc_svmR)
## [1] 0.08748996
acc_logs <- acc_enplus_train[4,]
#hist(acc_logs)
mean(acc_logs)
## [1] 1
median(acc_logs)
## [1] 1
sd(acc_logs)
## [1] 0
acc_rf <- acc_enplus_train[5,]
#hist(acc_rf)
mean(acc_rf)
## [1] 1
median(acc_rf)
## [1] 1
sd(acc_rf)
## [1] 0
# auc
auc_trees <- auc_enplus_train[1,]
#hist(auc_trees)
mean(auc_trees)
## [1] 0.9902381
median(auc_trees)
## [1] 1
sd(auc_trees)
## [1] 0.01439552
auc_svm <- auc_enplus_train[2,]
#hist(auc_svm)
mean(auc_svm)
## [1] 0.9288095
median(auc_svm)
## [1] 0.9285714
sd(auc_svm)
## [1] 0.02356658
auc_svmR <- auc_enplus_train[3,]
#hist(auc_svmR)
mean(auc_svmR)
## [1] 0.8940476
median(auc_svmR)
## [1] 0.9285714
sd(auc_svmR)
## [1] 0.08748996
auc_logs <- auc_enplus_train[4,]
#hist(auc_logs)
mean(auc_logs)
## [1] 1
median(auc_logs)
## [1] 1
sd(auc_logs)
## [1] 0
auc_rf <- auc_enplus_train[5,]
#hist(auc_rf)
mean(auc_rf)
## [1] 1
median(auc_rf)
## [1] 1
sd(auc_rf)
## [1] 0
# miscl
miscl_trees <- miscl_enplus_train[1,]
#hist(miscl_trees)
mean(miscl_trees)
## [1] 0.41
median(miscl_trees)
## [1] 0
sd(miscl_trees)
## [1] 0.6046119
miscl_svm <- miscl_enplus_train[2,]
#hist(miscl_svm)
mean(miscl_svm)
## [1] 2.99
median(miscl_svm)
## [1] 3
sd(miscl_svm)
## [1] 0.9897964
miscl_svmR <- miscl_enplus_train[3,]
#hist(miscl_svmR)
mean(miscl_svmR)
## [1] 4.45
median(miscl_svmR)
## [1] 3
sd(miscl_svmR)
## [1] 3.674578
miscl_logs <- miscl_enplus_train[4,]
#hist(miscl_logs)
mean(miscl_logs)
## [1] 0
median(miscl_logs)
## [1] 0
sd(miscl_logs)
## [1] 0
miscl_rf <- miscl_enplus_train[5,]
#hist(miscl_rf)
mean(miscl_rf)
## [1] 0
median(miscl_rf)
## [1] 0
sd(miscl_rf)
## [1] 0
# sensitivity
sensitivity_trees <- sensitivity_enplus_train[1,]
#hist(sensitivity_trees)
mean(sensitivity_trees)
## [1] 0.99
median(sensitivity_trees)
## [1] 1
sd(sensitivity_trees)
## [1] 0.01949334
sensitivity_svm <- sensitivity_enplus_train[2,]
#hist(sensitivity_svm)
mean(sensitivity_svm)
## [1] 0.857619
median(sensitivity_svm)
## [1] 0.8571429
sd(sensitivity_svm)
## [1] 0.04713316
sensitivity_svmR <- sensitivity_enplus_train[3,]
#hist(sensitivity_svmR)
mean(sensitivity_svmR)
## [1] 0.8771429
median(sensitivity_svmR)
## [1] 0.8571429
sd(sensitivity_svmR)
## [1] 0.06841653
sensitivity_logs <- sensitivity_enplus_train[4,]
#hist(sensitivity_logs)
mean(sensitivity_logs)
## [1] 1
median(sensitivity_logs)
## [1] 1
sd(sensitivity_logs)
## [1] 0
sensitivity_rf <- sensitivity_enplus_train[5,]
#hist(sensitivity_rf)
mean(sensitivity_rf)
## [1] 1
median(sensitivity_rf)
## [1] 1
sd(sensitivity_rf)
## [1] 0
# specificity
specificity_trees <- specificity_enplus_train[1,]
#hist(specificity_trees)
mean(specificity_trees)
## [1] 0.9904762
median(specificity_trees)
## [1] 1
sd(specificity_trees)
## [1] 0.01914358
specificity_svm <- specificity_enplus_train[2,]
#hist(specificity_svm)
mean(specificity_svm)
## [1] 1
median(specificity_svm)
## [1] 1
sd(specificity_svm)
## [1] 0
specificity_svmR <- specificity_enplus_train[3,]
#hist(specificity_svmR)
mean(specificity_svmR)
## [1] 0.9109524
median(specificity_svmR)
## [1] 1
sd(specificity_svmR)
## [1] 0.1542892
specificity_logs <- specificity_enplus_train[4,]
#hist(specificity_logs)
mean(specificity_logs)
## [1] 1
median(specificity_logs)
## [1] 1
sd(specificity_logs)
## [1] 0
specificity_rf <- specificity_enplus_train[5,]
#hist(specificity_rf)
mean(specificity_rf)
## [1] 1
median(specificity_rf)
## [1] 1
sd(specificity_rf)
## [1] 0
# fneg
fneg_trees <- fneg_enplus_train[1,]
#hist(fneg_trees)
mean(fneg_trees)
## [1] 0.21
median(fneg_trees)
## [1] 0
sd(fneg_trees)
## [1] 0.4093602
fneg_svm <- fneg_enplus_train[2,]
#hist(fneg_svm)
mean(fneg_svm)
## [1] 2.99
median(fneg_svm)
## [1] 3
sd(fneg_svm)
## [1] 0.9897964
fneg_svmR <- fneg_enplus_train[3,]
#hist(fneg_svmR)
mean(fneg_svmR)
## [1] 2.58
median(fneg_svmR)
## [1] 3
sd(fneg_svmR)
## [1] 1.436747
fneg_logs <- fneg_enplus_train[4,]
#hist(fneg_logs)
mean(fneg_logs)
## [1] 0
median(fneg_logs)
## [1] 0
sd(fneg_logs)
## [1] 0
fneg_rf <- fneg_enplus_train[5,]
#hist(fneg_rf)
mean(fneg_rf)
## [1] 0
median(fneg_rf)
## [1] 0
sd(fneg_rf)
## [1] 0
# acc
acc_trees <- acc_enplus[1,]
#hist(acc_trees)
mean(acc_trees)
## [1] 0.7133333
median(acc_trees)
## [1] 0.7222222
sd(acc_trees)
## [1] 0.0943338
acc_svm <- acc_enplus[2,]
#hist(acc_svm)
mean(acc_svm)
## [1] 0.7166667
median(acc_svm)
## [1] 0.7222222
sd(acc_svm)
## [1] 0.08148913
acc_svmR <- acc_enplus[3,]
#hist(acc_svmR)
mean(acc_svmR)
## [1] 0.7544444
median(acc_svmR)
## [1] 0.7777778
sd(acc_svmR)
## [1] 0.1008691
acc_logs <- acc_enplus[4,]
#hist(acc_logs)
mean(acc_logs)
## [1] 0.7094444
median(acc_logs)
## [1] 0.7222222
sd(acc_logs)
## [1] 0.09762583
acc_rf <- acc_enplus[5,]
#hist(acc_rf)
mean(acc_rf)
## [1] 0.7905556
median(acc_rf)
## [1] 0.7777778
sd(acc_rf)
## [1] 0.0956906
# auc
auc_trees <- auc_enplus[1,]
#hist(auc_trees)
mean(auc_trees)
## [1] 0.7133333
median(auc_trees)
## [1] 0.7222222
sd(auc_trees)
## [1] 0.0943338
auc_svm <- auc_enplus[2,]
#hist(auc_svm)
mean(auc_svm)
## [1] 0.7166667
median(auc_svm)
## [1] 0.7222222
sd(auc_svm)
## [1] 0.08148913
auc_svmR <- auc_enplus[3,]
#hist(auc_svmR)
mean(auc_svmR)
## [1] 0.7544444
median(auc_svmR)
## [1] 0.7777778
sd(auc_svmR)
## [1] 0.1008691
auc_logs <- auc_enplus[4,]
#hist(auc_logs)
mean(auc_logs)
## [1] 0.7094444
median(auc_logs)
## [1] 0.7222222
sd(auc_logs)
## [1] 0.09762583
auc_rf <- auc_enplus[5,]
#hist(auc_rf)
mean(auc_rf)
## [1] 0.7905556
median(auc_rf)
## [1] 0.7777778
sd(auc_rf)
## [1] 0.0956906
# miscl
miscl_trees <- miscl_enplus[1,]
#hist(miscl_trees)
mean(miscl_trees)
## [1] 5.16
median(miscl_trees)
## [1] 5
sd(miscl_trees)
## [1] 1.698008
miscl_svm <- miscl_enplus[2,]
#hist(miscl_svm)
mean(miscl_svm)
## [1] 5.1
median(miscl_svm)
## [1] 5
sd(miscl_svm)
## [1] 1.466804
miscl_svmR <- miscl_enplus[3,]
#hist(miscl_svmR)
mean(miscl_svmR)
## [1] 4.42
median(miscl_svmR)
## [1] 4
sd(miscl_svmR)
## [1] 1.815645
miscl_logs <- miscl_enplus[4,]
#hist(miscl_logs)
mean(miscl_logs)
## [1] 5.23
median(miscl_logs)
## [1] 5
sd(miscl_logs)
## [1] 1.757265
miscl_rf <- miscl_enplus[5,]
#hist(miscl_rf)
mean(miscl_rf)
## [1] 3.77
median(miscl_rf)
## [1] 4
sd(miscl_rf)
## [1] 1.722431
# sensitivity
sensitivity_trees <- sensitivity_enplus[1,]
#hist(sensitivity_trees)
mean(sensitivity_trees)
## [1] 0.7233333
median(sensitivity_trees)
## [1] 0.7777778
sd(sensitivity_trees)
## [1] 0.1390246
sensitivity_svm <- sensitivity_enplus[2,]
#hist(sensitivity_svm)
mean(sensitivity_svm)
## [1] 0.5888889
median(sensitivity_svm)
## [1] 0.5555556
sd(sensitivity_svm)
## [1] 0.1598884
sensitivity_svmR <- sensitivity_enplus[3,]
#hist(sensitivity_svmR)
mean(sensitivity_svmR)
## [1] 0.8133333
median(sensitivity_svmR)
## [1] 0.7777778
sd(sensitivity_svmR)
## [1] 0.1159228
sensitivity_logs <- sensitivity_enplus[4,]
#hist(sensitivity_logs)
mean(sensitivity_logs)
## [1] 0.6688889
median(sensitivity_logs)
## [1] 0.6666667
sd(sensitivity_logs)
## [1] 0.139459
sensitivity_rf <- sensitivity_enplus[5,]
#hist(sensitivity_rf)
mean(sensitivity_rf)
## [1] 0.7077778
median(sensitivity_rf)
## [1] 0.6666667
sd(sensitivity_rf)
## [1] 0.126986
# specificity
specificity_trees <- specificity_enplus[1,]
#hist(specificity_trees)
mean(specificity_trees)
## [1] 0.7033333
median(specificity_trees)
## [1] 0.6666667
sd(specificity_trees)
## [1] 0.1595409
specificity_svm <- specificity_enplus[2,]
#hist(specificity_svm)
mean(specificity_svm)
## [1] 0.8444444
median(specificity_svm)
## [1] 0.8888889
sd(specificity_svm)
## [1] 0.1181813
specificity_svmR <- specificity_enplus[3,]
#hist(specificity_svmR)
mean(specificity_svmR)
## [1] 0.6955556
median(specificity_svmR)
## [1] 0.6666667
sd(specificity_svmR)
## [1] 0.1957396
specificity_logs <- specificity_enplus[4,]
#hist(specificity_logs)
mean(specificity_logs)
## [1] 0.75
median(specificity_logs)
## [1] 0.7777778
sd(specificity_logs)
## [1] 0.1558399
specificity_rf <- specificity_enplus[5,]
#hist(specificity_rf)
mean(specificity_rf)
## [1] 0.8733333
median(specificity_rf)
## [1] 0.8888889
sd(specificity_rf)
## [1] 0.127344
# fneg
fneg_trees <- fneg_enplus[1,]
#hist(fneg_trees)
mean(fneg_trees)
## [1] 2.49
median(fneg_trees)
## [1] 2
sd(fneg_trees)
## [1] 1.251222
fneg_svm <- fneg_enplus[2,]
#hist(fneg_svm)
mean(fneg_svm)
## [1] 3.7
median(fneg_svm)
## [1] 4
sd(fneg_svm)
## [1] 1.438995
fneg_svmR <- fneg_enplus[3,]
#hist(fneg_svmR)
mean(fneg_svmR)
## [1] 1.68
median(fneg_svmR)
## [1] 2
sd(fneg_svmR)
## [1] 1.043305
fneg_logs <- fneg_enplus[4,]
#hist(fneg_logs)
mean(fneg_logs)
## [1] 2.98
median(fneg_logs)
## [1] 3
sd(fneg_logs)
## [1] 1.255131
fneg_rf <- fneg_enplus[5,]
#hist(fneg_rf)
mean(fneg_rf)
## [1] 2.63
median(fneg_rf)
## [1] 3
sd(fneg_rf)
## [1] 1.142874
xdata <- xdataT
ydata.raw <- as.data.frame(clinic1$class)
ydata.raw$row <- DATASET1$ID
#ydata.raw$class <- c(rep(0,28),rep(1,34))
ydata.raw$`clinic1$class`[ydata.raw$`clinic1$class` == "P"] <- 0
ydata.raw$`clinic1$class`[ydata.raw$`clinic1$class` == "Pm"] <- 1
rownames(ydata.raw) <- DATASET1$ID
xdata <- xdata[rownames(xdata) %in%
rownames(ydata.raw),]
ydata.raw <- as.data.frame(ydata.raw[rownames(ydata.raw) %in%
rownames(xdata),])
xdata <- xdata[ order(row.names(xdata)), ]
ydata.raw <- ydata.raw[ order(row.names(ydata.raw)), ]
ydata <- as.data.frame(ydata.raw[,1:2])
colnames(ydata) <- c("class","id")
names <- as.vector(top50_itw)
xdata_iTwiner <- as.data.frame(xdata[,names])
nomesgenes <- colnames(xdata_iTwiner)
colnames(xdata_iTwiner) <- paste0("Var", 1:50)
xdata_iTwiner$type <- as.factor(ydata$class)
ydata$class <- as.numeric(ydata$class)
## [1] 1
## [1] 1011
## [1] 2
## [1] 1022
## [1] 3
## [1] 1033
## [1] 4
## [1] 1044
## [1] 5
## [1] 1055
## [1] 6
## [1] 1066
## [1] 7
## [1] 1077
## [1] 8
## [1] 1088
## [1] 9
## [1] 1099
## [1] 10
## [1] 1110
## [1] 11
## [1] 1121
## [1] 12
## [1] 1132
## [1] 13
## [1] 1143
## [1] 14
## [1] 1154
## [1] 15
## [1] 1165
## [1] 16
## [1] 1176
## [1] 17
## [1] 1187
## [1] 18
## [1] 1198
## [1] 19
## [1] 1209
## [1] 20
## [1] 1220
## [1] 21
## [1] 1231
## [1] 22
## [1] 1242
## [1] 23
## [1] 1253
## [1] 24
## [1] 1264
## [1] 25
## [1] 1275
## [1] 26
## [1] 1286
## [1] 27
## [1] 1297
## [1] 28
## [1] 1308
## [1] 29
## [1] 1319
## [1] 30
## [1] 1330
## [1] 31
## [1] 1341
## [1] 32
## [1] 1352
## [1] 33
## [1] 1363
## [1] 34
## [1] 1374
## [1] 35
## [1] 1385
## [1] 36
## [1] 1396
## [1] 37
## [1] 1407
## [1] 38
## [1] 1418
## [1] 39
## [1] 1429
## [1] 40
## [1] 1440
## [1] 41
## [1] 1451
## [1] 42
## [1] 1462
## [1] 43
## [1] 1473
## [1] 44
## [1] 1484
## [1] 45
## [1] 1495
## [1] 46
## [1] 1506
## [1] 47
## [1] 1517
## [1] 48
## [1] 1528
## [1] 49
## [1] 1539
## [1] 50
## [1] 1550
## [1] 51
## [1] 1561
## [1] 52
## [1] 1572
## [1] 53
## [1] 1583
## [1] 54
## [1] 1594
## [1] 55
## [1] 1605
## [1] 56
## [1] 1616
## [1] 57
## [1] 1627
## [1] 58
## [1] 1638
## [1] 59
## [1] 1649
## [1] 60
## [1] 1660
## [1] 61
## [1] 1671
## [1] 62
## [1] 1682
## [1] 63
## [1] 1693
## [1] 64
## [1] 1704
## [1] 65
## [1] 1715
## [1] 66
## [1] 1726
## [1] 67
## [1] 1737
## [1] 68
## [1] 1748
## [1] 69
## [1] 1759
## [1] 70
## [1] 1770
## [1] 71
## [1] 1781
## [1] 72
## [1] 1792
## [1] 73
## [1] 1803
## [1] 74
## [1] 1814
## [1] 75
## [1] 1825
## [1] 76
## [1] 1836
## [1] 77
## [1] 1847
## [1] 78
## [1] 1858
## [1] 79
## [1] 1869
## [1] 80
## [1] 1880
## [1] 81
## [1] 1891
## [1] 82
## [1] 1902
## [1] 83
## [1] 1913
## [1] 84
## [1] 1924
## [1] 85
## [1] 1935
## [1] 86
## [1] 1946
## [1] 87
## [1] 1957
## [1] 88
## [1] 1968
## [1] 89
## [1] 1979
## [1] 90
## [1] 1990
## [1] 91
## [1] 2001
## [1] 92
## [1] 2012
## [1] 93
## [1] 2023
## [1] 94
## [1] 2034
## [1] 95
## [1] 2045
## [1] 96
## [1] 2056
## [1] 97
## [1] 2067
## [1] 98
## [1] 2078
## [1] 99
## [1] 2089
## [1] 100
## [1] 2100
# acc
acc_trees <- acc_tcoxplus_train[1,]
#hist(acc_trees)
mean(acc_trees)
## [1] 0.9952381
median(acc_trees)
## [1] 1
sd(acc_trees)
## [1] 0.01070159
acc_svm <- acc_tcoxplus_train[2,]
#hist(acc_svm)
mean(acc_svm)
## [1] 0.9090476
median(acc_svm)
## [1] 0.9047619
sd(acc_svm)
## [1] 0.02449237
acc_svmR <- acc_tcoxplus_train[3,]
#hist(acc_svmR)
mean(acc_svmR)
## [1] 0.9178571
median(acc_svmR)
## [1] 0.9166667
sd(acc_svmR)
## [1] 0.05502478
acc_logs <- acc_tcoxplus_train[4,]
#hist(acc_logs)
mean(acc_logs)
## [1] 0.9985714
median(acc_logs)
## [1] 1
sd(acc_logs)
## [1] 0.005682935
acc_rf <- acc_tcoxplus_train[5,]
#hist(acc_rf)
mean(acc_rf)
## [1] 0.9695238
median(acc_rf)
## [1] 0.9761905
sd(acc_rf)
## [1] 0.01590185
# auc
auc_trees <- auc_tcoxplus_train[1,]
#hist(auc_trees)
mean(auc_trees)
## [1] 0.9952381
median(auc_trees)
## [1] 1
sd(auc_trees)
## [1] 0.01070159
auc_svm <- auc_tcoxplus_train[2,]
#hist(auc_svm)
mean(auc_svm)
## [1] 0.9090476
median(auc_svm)
## [1] 0.9047619
sd(auc_svm)
## [1] 0.02449237
auc_svmR <- auc_tcoxplus_train[3,]
#hist(auc_svmR)
mean(auc_svmR)
## [1] 0.9178571
median(auc_svmR)
## [1] 0.9166667
sd(auc_svmR)
## [1] 0.05502478
auc_logs <- auc_tcoxplus_train[4,]
#hist(auc_logs)
mean(auc_logs)
## [1] 0.9985714
median(auc_logs)
## [1] 1
sd(auc_logs)
## [1] 0.005682935
auc_rf <- auc_tcoxplus_train[5,]
#hist(auc_rf)
mean(auc_rf)
## [1] 0.9695238
median(auc_rf)
## [1] 0.9761905
sd(auc_rf)
## [1] 0.01590185
# miscl
miscl_trees <- miscl_tcoxplus_train[1,]
#hist(miscl_trees)
mean(miscl_trees)
## [1] 0.2
median(miscl_trees)
## [1] 0
sd(miscl_trees)
## [1] 0.4494666
miscl_svm <- miscl_tcoxplus_train[2,]
#hist(miscl_svm)
mean(miscl_svm)
## [1] 3.82
median(miscl_svm)
## [1] 4
sd(miscl_svm)
## [1] 1.02868
miscl_svmR <- miscl_tcoxplus_train[3,]
#hist(miscl_svmR)
mean(miscl_svmR)
## [1] 3.45
median(miscl_svmR)
## [1] 3.5
sd(miscl_svmR)
## [1] 2.311041
miscl_logs <- miscl_tcoxplus_train[4,]
#hist(miscl_logs)
mean(miscl_logs)
## [1] 0.06
median(miscl_logs)
## [1] 0
sd(miscl_logs)
## [1] 0.2386833
miscl_rf <- miscl_tcoxplus_train[5,]
#hist(miscl_rf)
mean(miscl_rf)
## [1] 1.28
median(miscl_rf)
## [1] 1
sd(miscl_rf)
## [1] 0.6678777
# sensitivity
sensitivity_trees <- sensitivity_tcoxplus_train[1,]
#hist(sensitivity_trees)
mean(sensitivity_trees)
## [1] 0.9985714
median(sensitivity_trees)
## [1] 1
sd(sensitivity_trees)
## [1] 0.008164124
sensitivity_svm <- sensitivity_tcoxplus_train[2,]
#hist(sensitivity_svm)
mean(sensitivity_svm)
## [1] 0.8185714
median(sensitivity_svm)
## [1] 0.8095238
sd(sensitivity_svm)
## [1] 0.04913182
sensitivity_svmR <- sensitivity_tcoxplus_train[3,]
#hist(sensitivity_svmR)
mean(sensitivity_svmR)
## [1] 0.8642857
median(sensitivity_svmR)
## [1] 0.8571429
sd(sensitivity_svmR)
## [1] 0.08188636
sensitivity_logs <- sensitivity_tcoxplus_train[4,]
#hist(sensitivity_logs)
mean(sensitivity_logs)
## [1] 0.997619
median(sensitivity_logs)
## [1] 1
sd(sensitivity_logs)
## [1] 0.01043061
sensitivity_rf <- sensitivity_tcoxplus_train[5,]
#hist(sensitivity_rf)
mean(sensitivity_rf)
## [1] 0.9390476
median(sensitivity_rf)
## [1] 0.952381
sd(sensitivity_rf)
## [1] 0.0318037
# specificity
specificity_trees <- specificity_tcoxplus_train[1,]
#hist(specificity_trees)
mean(specificity_trees)
## [1] 0.9919048
median(specificity_trees)
## [1] 1
sd(specificity_trees)
## [1] 0.02036678
specificity_svm <- specificity_tcoxplus_train[2,]
#hist(specificity_svm)
mean(specificity_svm)
## [1] 0.9995238
median(specificity_svm)
## [1] 1
sd(specificity_svm)
## [1] 0.004761905
specificity_svmR <- specificity_tcoxplus_train[3,]
#hist(specificity_svmR)
mean(specificity_svmR)
## [1] 0.9714286
median(specificity_svmR)
## [1] 1
sd(specificity_svmR)
## [1] 0.06015776
specificity_logs <- specificity_tcoxplus_train[4,]
#hist(specificity_logs)
mean(specificity_logs)
## [1] 0.9995238
median(specificity_logs)
## [1] 1
sd(specificity_logs)
## [1] 0.004761905
specificity_rf <- specificity_tcoxplus_train[5,]
#hist(specificity_rf)
mean(specificity_rf)
## [1] 1
median(specificity_rf)
## [1] 1
sd(specificity_rf)
## [1] 0
# fneg
fneg_trees <- fneg_tcoxplus_train[1,]
#hist(fneg_trees)
mean(fneg_trees)
## [1] 0.03
median(fneg_trees)
## [1] 0
sd(fneg_trees)
## [1] 0.1714466
fneg_svm <- fneg_tcoxplus_train[2,]
#hist(fneg_svm)
mean(fneg_svm)
## [1] 3.81
median(fneg_svm)
## [1] 4
sd(fneg_svm)
## [1] 1.031768
fneg_svmR <- fneg_tcoxplus_train[3,]
#hist(fneg_svmR)
mean(fneg_svmR)
## [1] 2.85
median(fneg_svmR)
## [1] 3
sd(fneg_svmR)
## [1] 1.719614
fneg_logs <- fneg_tcoxplus_train[4,]
#hist(fneg_logs)
mean(fneg_logs)
## [1] 0.05
median(fneg_logs)
## [1] 0
sd(fneg_logs)
## [1] 0.2190429
fneg_rf <- fneg_tcoxplus_train[5,]
#hist(fneg_rf)
mean(fneg_rf)
## [1] 1.28
median(fneg_rf)
## [1] 1
sd(fneg_rf)
## [1] 0.6678777
# acc
acc_trees <- acc_tcoxplus[1,]
#hist(acc_trees)
mean(acc_trees)
## [1] 0.7672222
median(acc_trees)
## [1] 0.7777778
sd(acc_trees)
## [1] 0.09563845
acc_svm <- acc_tcoxplus[2,]
#hist(acc_svm)
mean(acc_svm)
## [1] 0.8261111
median(acc_svm)
## [1] 0.8333333
sd(acc_svm)
## [1] 0.07135367
acc_svmR <- acc_tcoxplus[3,]
#hist(acc_svmR)
mean(acc_svmR)
## [1] 0.7961111
median(acc_svmR)
## [1] 0.7777778
sd(acc_svmR)
## [1] 0.08900631
acc_logs <- acc_tcoxplus[4,]
#hist(acc_logs)
mean(acc_logs)
## [1] 0.7255556
median(acc_logs)
## [1] 0.7222222
sd(acc_logs)
## [1] 0.09370377
acc_rf <- acc_tcoxplus[5,]
#hist(acc_rf)
mean(acc_rf)
## [1] 0.8627778
median(acc_rf)
## [1] 0.8611111
sd(acc_rf)
## [1] 0.06290104
# auc
auc_trees <- auc_tcoxplus[1,]
#hist(auc_trees)
mean(auc_trees)
## [1] 0.7672222
median(auc_trees)
## [1] 0.7777778
sd(auc_trees)
## [1] 0.09563845
auc_svm <- auc_tcoxplus[2,]
#hist(auc_svm)
mean(auc_svm)
## [1] 0.8261111
median(auc_svm)
## [1] 0.8333333
sd(auc_svm)
## [1] 0.07135367
auc_svmR <- auc_tcoxplus[3,]
#hist(auc_svmR)
mean(auc_svmR)
## [1] 0.7961111
median(auc_svmR)
## [1] 0.7777778
sd(auc_svmR)
## [1] 0.08900631
auc_logs <- auc_tcoxplus[4,]
#hist(auc_logs)
mean(auc_logs)
## [1] 0.7255556
median(auc_logs)
## [1] 0.7222222
sd(auc_logs)
## [1] 0.09370377
auc_rf <- auc_tcoxplus[5,]
#hist(auc_rf)
mean(auc_rf)
## [1] 0.8627778
median(auc_rf)
## [1] 0.8611111
sd(auc_rf)
## [1] 0.06290104
# miscl
miscl_trees <- miscl_tcoxplus[1,]
#hist(miscl_trees)
mean(miscl_trees)
## [1] 4.19
median(miscl_trees)
## [1] 4
sd(miscl_trees)
## [1] 1.721492
miscl_svm <- miscl_tcoxplus[2,]
#hist(miscl_svm)
mean(miscl_svm)
## [1] 3.13
median(miscl_svm)
## [1] 3
sd(miscl_svm)
## [1] 1.284366
miscl_svmR <- miscl_tcoxplus[3,]
#hist(miscl_svmR)
mean(miscl_svmR)
## [1] 3.67
median(miscl_svmR)
## [1] 4
sd(miscl_svmR)
## [1] 1.602114
miscl_logs <- miscl_tcoxplus[4,]
#hist(miscl_logs)
mean(miscl_logs)
## [1] 4.94
median(miscl_logs)
## [1] 5
sd(miscl_logs)
## [1] 1.686668
miscl_rf <- miscl_tcoxplus[5,]
#hist(miscl_rf)
mean(miscl_rf)
## [1] 2.47
median(miscl_rf)
## [1] 2.5
sd(miscl_rf)
## [1] 1.132219
# sensitivity
sensitivity_trees <- sensitivity_tcoxplus[1,]
#hist(sensitivity_trees)
mean(sensitivity_trees)
## [1] 0.7255556
median(sensitivity_trees)
## [1] 0.7777778
sd(sensitivity_trees)
## [1] 0.1362705
sensitivity_svm <- sensitivity_tcoxplus[2,]
#hist(sensitivity_svm)
mean(sensitivity_svm)
## [1] 0.6566667
median(sensitivity_svm)
## [1] 0.6666667
sd(sensitivity_svm)
## [1] 0.1430913
sensitivity_svmR <- sensitivity_tcoxplus[3,]
#hist(sensitivity_svmR)
mean(sensitivity_svmR)
## [1] 0.7933333
median(sensitivity_svmR)
## [1] 0.7777778
sd(sensitivity_svmR)
## [1] 0.1128042
sensitivity_logs <- sensitivity_tcoxplus[4,]
#hist(sensitivity_logs)
mean(sensitivity_logs)
## [1] 0.6433333
median(sensitivity_logs)
## [1] 0.6666667
sd(sensitivity_logs)
## [1] 0.1589457
sensitivity_rf <- sensitivity_tcoxplus[5,]
#hist(sensitivity_rf)
mean(sensitivity_rf)
## [1] 0.7288889
median(sensitivity_rf)
## [1] 0.7777778
sd(sensitivity_rf)
## [1] 0.1226551
# specificity
specificity_trees <- specificity_tcoxplus[1,]
#hist(specificity_trees)
mean(specificity_trees)
## [1] 0.8088889
median(specificity_trees)
## [1] 0.7777778
sd(specificity_trees)
## [1] 0.1456695
specificity_svm <- specificity_tcoxplus[2,]
#hist(specificity_svm)
mean(specificity_svm)
## [1] 0.9955556
median(specificity_svm)
## [1] 1
sd(specificity_svm)
## [1] 0.02188293
specificity_svmR <- specificity_tcoxplus[3,]
#hist(specificity_svmR)
mean(specificity_svmR)
## [1] 0.7988889
median(specificity_svmR)
## [1] 0.7777778
sd(specificity_svmR)
## [1] 0.1528668
specificity_logs <- specificity_tcoxplus[4,]
#hist(specificity_logs)
mean(specificity_logs)
## [1] 0.8077778
median(specificity_logs)
## [1] 0.7777778
sd(specificity_logs)
## [1] 0.1300906
specificity_rf <- specificity_tcoxplus[5,]
#hist(specificity_rf)
mean(specificity_rf)
## [1] 0.9966667
median(specificity_rf)
## [1] 1
sd(specificity_rf)
## [1] 0.02474461
# fneg
fneg_trees <- fneg_tcoxplus[1,]
#hist(fneg_trees)
mean(fneg_trees)
## [1] 2.47
median(fneg_trees)
## [1] 2
sd(fneg_trees)
## [1] 1.226434
fneg_svm <- fneg_tcoxplus[2,]
#hist(fneg_svm)
mean(fneg_svm)
## [1] 3.09
median(fneg_svm)
## [1] 3
sd(fneg_svm)
## [1] 1.287822
fneg_svmR <- fneg_tcoxplus[3,]
#hist(fneg_svmR)
mean(fneg_svmR)
## [1] 1.86
median(fneg_svmR)
## [1] 2
sd(fneg_svmR)
## [1] 1.015237
fneg_logs <- fneg_tcoxplus[4,]
#hist(fneg_logs)
mean(fneg_logs)
## [1] 3.21
median(fneg_logs)
## [1] 3
sd(fneg_logs)
## [1] 1.430512
fneg_rf <- fneg_tcoxplus[5,]
#hist(fneg_rf)
mean(fneg_rf)
## [1] 2.44
median(fneg_rf)
## [1] 2
sd(fneg_rf)
## [1] 1.103896
# save results
#save.image("~/results1_2010_final.RData")
dt1 <- acc[1,]
dt_en1 <- acc_enplus[1,]
dt_iTwiner1 <- acc_tcoxplus[1,]
#dt_hub <- acc_hubplus[1,]
acc_dt1 <- as.data.frame(c(dt1,dt_en1, dt_iTwiner1
#,dt_hub
))
colnames(acc_dt1) <- "acc"
#acc_dt$group <- "HUB + DT"
#acc_dt$group[1:100] <- "DT"
acc_dt1$group <- "DT"
acc_dt1$group[101:200] <- "EN + DT"
acc_dt1$group[201:300] <- "iTwiner + DT"
acc_dt1<- acc_dt1 %>% mutate_if(is.character,factor)
# acc_dt$group <- ordered(acc_dt$group, levels = c("DT", "EN + DT","iTwiner + DT", "HUB + DT"))
acc_dt1$group <- ordered(acc_dt1$group, levels = c("DT", "EN + DT","iTwiner + DT"))
acc_dt1$dataset <- "DATASET1"
svmL1 <- acc[2,]
svmL_en1 <- acc_enplus[2,]
svmL_iTwiner1 <- acc_tcoxplus[2,]
acc_svmL1 <- as.data.frame(c(svmL1,svmL_en1, svmL_iTwiner1
#,svmL_hub
))
colnames(acc_svmL1) <- "acc"
acc_svmL1$group <- "svmL"
acc_svmL1$group[101:200] <- "EN + svmL"
acc_svmL1$group[201:300] <- "iTwiner + svmL"
acc_svmL1<- acc_svmL1 %>% mutate_if(is.character,factor)
acc_svmL1$group <- ordered(acc_svmL1$group, levels = c("svmL", "EN + svmL","iTwiner + svmL"))
acc_svmL1$dataset <- "DATASET1"
svmR1 <- acc[3,]
svmR_en1 <- acc_enplus[3,]
svmR_iTwiner1 <- acc_tcoxplus[3,]
acc_svmR1 <- as.data.frame(c(svmR1,svmR_en1, svmR_iTwiner1
#,svmR_hub
))
colnames(acc_svmR1) <- "acc"
acc_svmR1$group <- "svmR"
acc_svmR1$group[101:200] <- "EN + svmR"
acc_svmR1$group[201:300] <- "iTwiner + svmR"
acc_svmR1<- acc_svmR1 %>% mutate_if(is.character,factor)
acc_svmR1$group <- ordered(acc_svmR1$group, levels = c("svmR", "EN + svmR","iTwiner + svmR"))
acc_svmR1$dataset <- "DATASET1"
logist1 <- acc[4,]
logist_en1 <- acc_enplus[4,]
logist_iTwiner1 <- acc_tcoxplus[4,]
acc_logist1 <- as.data.frame(c(logist1,logist_en1, logist_iTwiner1
#,logist_hub
))
colnames(acc_logist1) <- "acc"
acc_logist1$group <- "logist"
acc_logist1$group[101:200] <- "EN + logist"
acc_logist1$group[201:300] <- "iTwiner + logist"
acc_logist1<- acc_logist1 %>% mutate_if(is.character,factor)
acc_logist1$group <- ordered(acc_logist1$group, levels = c("logist", "EN + logist","iTwiner + logist"))
acc_logist1$dataset <- "DATASET1"
rf1 <- acc[5,]
rf_en1 <- acc_enplus[5,]
rf_iTwiner1 <- acc_tcoxplus[5,]
acc_rf1 <- as.data.frame(c(rf1,rf_en1, rf_iTwiner1
#,rf_hub
))
colnames(acc_rf1) <- "acc"
acc_rf1$group <- "rf"
acc_rf1$group[101:200] <- "EN + rf"
acc_rf1$group[201:300] <- "iTwiner + rf"
acc_rf1<- acc_rf1 %>% mutate_if(is.character,factor)
acc_rf1$group <- ordered(acc_rf1$group, levels = c("rf", "EN + rf","iTwiner + rf"))
acc_rf1$dataset <- "DATASET1"
load("~/CRC_LCosta/results/results2_2010_final.RData")
DATASET1_bal <- df2
DATASET1 <- DATASET2_bal
clinic1 <- clinic2_bal
datasurv1 <- datasurv2
clinic1_bal <- clinic2_bal
# primM <- DATASET1 %>%
# filter(str_detect(class, "m"))
# rownames(primM) <- primM$ID
# primN <- DATASET1 %>%
# filter(!str_detect(class, "Pm"))
# rownames(primN) <- primN$ID
#
# clinic_prim <- DATASET1
#
# rnaprimM <- rnaseq1[rownames(rnaseq1) %in%
# rownames(primM),]
#
# rnaprimN <- rnaseq1[rownames(rnaseq1) %in%
# rownames(primN),]
#
# rnaprim <- rbind(rnaprimM,rnaprimN)
# xmet <- rnaprimM [,sapply(seq(ncol(rnaprimM)), function(ix) {sd(rnaprimM[,ix])}) != 0]
# xnon <- rnaprimN[,sapply(seq(ncol(rnaprimN)), function(ix) {sd(rnaprimN[,ix])}) != 0]
#
# xmet_less <- xmet[,which(colnames(xmet) %in% colnames(xnon))]
# xnon_less <- xnon[,which(colnames(xnon) %in% colnames(xmet))]
#
# # normalizing data
# xmet_norm <- scale(log2(xmet_less+1))
# xnon_norm <- scale(log2(xnon_less+1))
#
# xdataT <- rbind(xmet_less,xnon_less)
# xdataT <- xdataT[ order(row.names(xdataT)), ]
#
#
# rm(xmet,xmet_less,xnon,xnon_less,rnaprimM,rnaprimN)
# #xmet_cor <- Matrix(cor(xmet_norm), sparse = TRUE)
# xmet_cor <- cor(xmet_norm)
# #xmet_cor <- as.data.frame(xmet_cor)
# xnon_cor <- cor(xnon_norm)
# #xnon_cor <- as.data.frame(xnon_cor)
#
# # angular distance
# ang_weight <- vector()
# for (i in 1:dim(xmet_cor)[2]){
# ang_weight[i] <- acos(cosine(xmet_cor[,i],xnon_cor[,i]))/pi
# }
#
# ## normalized weights
#
# weights <- ang_weight / max(ang_weight)
# hist(weights,main="w")
#
#
# pen_weight2 <- 1 / weights
# hist(pen_weight2, main="1 / w")
#
# rm(xmet_cor,xnon_cor)
clinical <- as.data.frame(clinic1_bal)
basic_eda <- function(clinical)
{
glimpse(clinical)
#df_Status(clinical)
freq(clinical)
profiling_num(clinical)
plot_num(clinical)
describe(clinic1)
}
basic_eda(clinical)
## Rows: 55
## Columns: 6
## $ class <chr> "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", …
## $ organ <chr> "colon", "colon", "rectum", "colon", "colon", "colon", "colo…
## $ Sex <chr> "f", "m", "m", "m", "m", "f", "f", "m", "f", "m", "m", "f", …
## $ Age <dbl> 73, 59, 59, 81, 78, 75, 61, 73, 36, 70, 72, 71, 75, 78, 60, …
## $ Stage <chr> "II", "II", "II", "II", "III", "II", "II", "III", "II", "III…
## $ sidedness <chr> "right", "right", "rectum", "right", "left", "right", "left"…
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## class frequency percentage cumulative_perc
## 1 Pm 30 54.55 54.55
## 2 P 25 45.45 100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## organ frequency percentage cumulative_perc
## 1 colon 49 89.09 89.09
## 2 rectum 6 10.91 100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## Sex frequency percentage cumulative_perc
## 1 f 30 54.55 54.55
## 2 m 25 45.45 100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## Stage frequency percentage cumulative_perc
## 1 III 31 56.36 56.36
## 2 II 24 43.64 100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## sidedness frequency percentage cumulative_perc
## 1 right 23 41.82 41.82
## 2 left 21 38.18 80.00
## 3 rectum 6 10.91 90.91
## 4 <NA> 5 9.09 100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## clinic1
##
## 6 Variables 55 Observations
## --------------------------------------------------------------------------------
## class
## n missing distinct
## 55 0 2
##
## Value P Pm
## Frequency 25 30
## Proportion 0.455 0.545
## --------------------------------------------------------------------------------
## organ
## n missing distinct
## 55 0 2
##
## Value colon rectum
## Frequency 49 6
## Proportion 0.891 0.109
## --------------------------------------------------------------------------------
## Sex
## n missing distinct
## 55 0 2
##
## Value f m
## Frequency 30 25
## Proportion 0.545 0.455
## --------------------------------------------------------------------------------
## Age
## n missing distinct Info Mean Gmd .05 .10
## 47 8 26 0.998 67.77 13.33 42.5 56.6
## .25 .50 .75 .90 .95
## 59.5 70.0 75.0 81.0 85.0
##
## lowest : 36 37 38 53 56, highest: 78 81 85 86 87
## --------------------------------------------------------------------------------
## Stage
## n missing distinct
## 55 0 2
##
## Value II III
## Frequency 24 31
## Proportion 0.436 0.564
## --------------------------------------------------------------------------------
## sidedness
## n missing distinct
## 50 5 3
##
## Value left rectum right
## Frequency 21 6 23
## Proportion 0.42 0.12 0.46
## --------------------------------------------------------------------------------
a <- na.omit(clinical$Age)
mean(a)
## [1] 67.76596
clinical_p <- clinic1_bal %>%
filter(!str_detect(class, "Pm"))
basic_eda <- function(clinical_p)
{
glimpse(clinical_p)
#df_Status(clinical_p)
freq(clinical_p)
profiling_num(clinical_p)
plot_num(clinical_p)
describe(clinical_p)
}
basic_eda(clinical_p)
## Rows: 25
## Columns: 6
## $ class <chr> "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", …
## $ organ <chr> "colon", "colon", "rectum", "colon", "colon", "colon", "colo…
## $ Sex <chr> "f", "m", "m", "m", "m", "f", "f", "m", "f", "m", "m", "f", …
## $ Age <dbl> 73, 59, 59, 81, 78, 75, 61, 73, 36, 70, 72, 71, 75, 78, 60, …
## $ Stage <chr> "II", "II", "II", "II", "III", "II", "II", "III", "II", "III…
## $ sidedness <chr> "right", "right", "rectum", "right", "left", "right", "left"…
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## class frequency percentage cumulative_perc
## 1 P 25 100 100
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## organ frequency percentage cumulative_perc
## 1 colon 24 96 96
## 2 rectum 1 4 100
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## Sex frequency percentage cumulative_perc
## 1 f 13 52 52
## 2 m 12 48 100
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## Stage frequency percentage cumulative_perc
## 1 II 14 56 56
## 2 III 11 44 100
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## sidedness frequency percentage cumulative_perc
## 1 right 14 56 56
## 2 left 8 32 88
## 3 <NA> 2 8 96
## 4 rectum 1 4 100
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## clinical_p
##
## 6 Variables 25 Observations
## --------------------------------------------------------------------------------
## class
## n missing distinct value
## 25 0 1 P
##
## Value P
## Frequency 25
## Proportion 1
## --------------------------------------------------------------------------------
## organ
## n missing distinct
## 25 0 2
##
## Value colon rectum
## Frequency 24 1
## Proportion 0.96 0.04
## --------------------------------------------------------------------------------
## Sex
## n missing distinct
## 25 0 2
##
## Value f m
## Frequency 13 12
## Proportion 0.52 0.48
## --------------------------------------------------------------------------------
## Age
## n missing distinct Info Mean Gmd .05 .10
## 21 4 14 0.993 66.52 12.83 38 57
## .25 .50 .75 .90 .95
## 60 71 75 78 78
##
## lowest : 36 38 57 59 60, highest: 72 73 75 78 81
##
## Value 36 38 57 59 60 61 63 70 71 72 73
## Frequency 1 1 1 2 1 1 1 2 1 1 3
## Proportion 0.048 0.048 0.048 0.095 0.048 0.048 0.048 0.095 0.048 0.048 0.143
##
## Value 75 78 81
## Frequency 3 2 1
## Proportion 0.143 0.095 0.048
## --------------------------------------------------------------------------------
## Stage
## n missing distinct
## 25 0 2
##
## Value II III
## Frequency 14 11
## Proportion 0.56 0.44
## --------------------------------------------------------------------------------
## sidedness
## n missing distinct
## 23 2 3
##
## Value left rectum right
## Frequency 8 1 14
## Proportion 0.348 0.043 0.609
## --------------------------------------------------------------------------------
a <- na.omit(clinical_p$Age)
mean(a)
## [1] 66.52381
clinical_Pm <- clinic1_bal%>%
filter(str_detect(class, "Pm"))
basic_eda <- function(clinical_Pm)
{
glimpse(clinical_Pm)
#df_Status(clinical_Pm)
freq(clinical_Pm)
profiling_num(clinical_Pm)
plot_num(clinical_Pm)
describe(clinical_Pm)
}
basic_eda(clinical_Pm)
## Rows: 30
## Columns: 6
## $ class <chr> "Pm", "Pm", "Pm", "Pm", "Pm", "Pm", "Pm", "Pm", "Pm", "Pm", …
## $ organ <chr> "rectum", "colon", "rectum", "colon", "colon", "colon", "rec…
## $ Sex <chr> "f", "f", "f", "m", "f", "f", "m", "f", "f", "m", "m", "f", …
## $ Age <dbl> 58, 85, 62, 67, 75, 69, 58, 57, 72, 65, 61, 78, 81, 57, 74, …
## $ Stage <chr> "II", "II", "III", "III", "III", "II", "II", "III", "II", "I…
## $ sidedness <chr> "left", "right", "rectum", "rectum", "left", "left", "rectum…
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## class frequency percentage cumulative_perc
## 1 Pm 30 100 100
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## organ frequency percentage cumulative_perc
## 1 colon 25 83.33 83.33
## 2 rectum 5 16.67 100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## Sex frequency percentage cumulative_perc
## 1 f 17 56.67 56.67
## 2 m 13 43.33 100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## Stage frequency percentage cumulative_perc
## 1 III 20 66.67 66.67
## 2 II 10 33.33 100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## sidedness frequency percentage cumulative_perc
## 1 left 13 43.33 43.33
## 2 right 9 30.00 73.33
## 3 rectum 5 16.67 90.00
## 4 <NA> 3 10.00 100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## clinical_Pm
##
## 6 Variables 30 Observations
## --------------------------------------------------------------------------------
## class
## n missing distinct value
## 30 0 1 Pm
##
## Value Pm
## Frequency 30
## Proportion 1
## --------------------------------------------------------------------------------
## organ
## n missing distinct
## 30 0 2
##
## Value colon rectum
## Frequency 25 5
## Proportion 0.833 0.167
## --------------------------------------------------------------------------------
## Sex
## n missing distinct
## 30 0 2
##
## Value f m
## Frequency 17 13
## Proportion 0.567 0.433
## --------------------------------------------------------------------------------
## Age
## n missing distinct Info Mean Gmd .05 .10
## 26 4 18 0.997 68.77 13.91 53.75 56.50
## .25 .50 .75 .90 .95
## 58.75 69.00 77.25 85.00 85.75
##
## lowest : 37 53 56 57 58, highest: 78 81 85 86 87
##
## Value 37 53 56 57 58 61 62 65 67 69 72
## Frequency 1 1 1 2 2 1 1 1 1 3 2
## Proportion 0.038 0.038 0.038 0.077 0.077 0.038 0.038 0.038 0.038 0.115 0.077
##
## Value 74 75 78 81 85 86 87
## Frequency 2 1 1 2 2 1 1
## Proportion 0.077 0.038 0.038 0.077 0.077 0.038 0.038
## --------------------------------------------------------------------------------
## Stage
## n missing distinct
## 30 0 2
##
## Value II III
## Frequency 10 20
## Proportion 0.333 0.667
## --------------------------------------------------------------------------------
## sidedness
## n missing distinct
## 27 3 3
##
## Value left rectum right
## Frequency 13 5 9
## Proportion 0.481 0.185 0.333
## --------------------------------------------------------------------------------
a <- na.omit(clinical_Pm$Age)
mean(a)
## [1] 68.76923
clinical_factor <- clinic1_bal
clinical_factor <- clinical_factor %>%
mutate_if(sapply(clinical_factor, is.character), as.factor)
#clinical_factor <- na.omit(clinical_factor)
ggplot(data = clinical_factor) +
geom_bar(mapping = aes(x = class, fill = organ), position = "fill") + scale_fill_manual(values=c("#ADD8E6", "#4682B4"))
ggplot(data = clinical_factor) +
geom_bar(mapping = aes(x = class, fill = Sex), position = "fill") + scale_fill_manual(values=c("#ADD8E6", "#4682B4"))
clinical_factor1 <- clinical_factor %>%
filter(!str_detect(sidedness, "rectum"))
ggplot(data = clinical_factor1) +
geom_bar(mapping = aes(x = class, fill = sidedness), position = "fill") + scale_fill_manual(values=c("#ADD8E6", "#4682B4"))
ggplot(data = clinical_factor) +
geom_bar(mapping = aes(x = class, fill = Stage), position = "fill") + scale_fill_manual(values=c("#ADD8E6", "#4682B4", "#000080"))
clinical_factor %>%
ggplot( aes(x=Age, fill=class)) +
geom_histogram( color="#e9ecef", alpha=0.6, position = 'identity') +
scale_fill_manual(values=c("#69b3a2", "#404080"))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 8 rows containing non-finite values (stat_bin).
labs(fill="")
## $fill
## [1] ""
##
## attr(,"class")
## [1] "labels"
stat_data_organ <- table(clinical_factor$class,clinical_factor$organ)
#fazer plot(...)
fisher.test(stat_data_organ)
##
## Fisher's Exact Test for Count Data
##
## data: stat_data_organ
## p-value = 0.2045
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.4735918 236.0073881
## sample estimates:
## odds ratio
## 4.683156
stat_data_sex <- table(clinical_factor$class,clinical_factor$Sex)
fisher.test(stat_data_sex)
##
## Fisher's Exact Test for Count Data
##
## data: stat_data_sex
## p-value = 0.7899
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.2504095 2.7417911
## sample estimates:
## odds ratio
## 0.8312827
stat_data_stage <- table(clinical_factor$class,clinical_factor$Stage)
fisher.test(stat_data_stage)
##
## Fisher's Exact Test for Count Data
##
## data: stat_data_stage
## p-value = 0.109
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.7476062 8.7792316
## sample estimates:
## odds ratio
## 2.500865
stat_data_side <- table(clinical_factor$class,clinical_factor$sidedness)
fisher.test(stat_data_side)
##
## Fisher's Exact Test for Count Data
##
## data: stat_data_side
## p-value = 0.1191
## alternative hypothesis: two.sided
hist(clinical_factor$Age[clinical_factor$class=="P"])
hist(clinical_factor$Age[clinical_factor$class=="Pm"])
tapply(clinical_factor$Age,clinical_factor$class, summary)
## $P
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 36.00 60.00 71.00 66.52 75.00 81.00 4
##
## $Pm
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 37.00 58.75 69.00 68.77 77.25 87.00 4
t.test(Age ~ class, clinical_factor)
##
## Welch Two Sample t-test
##
## data: Age by class
## t = -0.63093, df = 43.074, p-value = 0.5314
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -9.422239 4.931396
## sample estimates:
## mean in group P mean in group Pm
## 66.52381 68.76923
data <- merge(datasurv1, clinical, by="row.names")
fit <- survfit(Surv(time, Status) ~ Stage, data = data)
print(fit)
## Call: survfit(formula = Surv(time, Status) ~ Stage, data = data)
##
## n events median 0.95LCL 0.95UCL
## Stage=II 24 11 2963 2011 NA
## Stage=III 31 17 1157 797 NA
# Summary of survival curves
summary(fit)
## Call: survfit(formula = Surv(time, Status) ~ Stage, data = data)
##
## Stage=II
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 173 24 1 0.958 0.0408 0.882 1.000
## 204 22 1 0.915 0.0577 0.808 1.000
## 400 21 1 0.871 0.0695 0.745 1.000
## 1357 20 1 0.828 0.0785 0.687 0.997
## 1428 19 1 0.784 0.0856 0.633 0.971
## 1635 16 1 0.735 0.0932 0.573 0.942
## 1726 15 1 0.686 0.0990 0.517 0.910
## 2011 13 1 0.633 0.1045 0.458 0.875
## 2545 9 1 0.563 0.1142 0.378 0.838
## 2963 7 1 0.483 0.1230 0.293 0.795
## 3044 6 1 0.402 0.1261 0.218 0.743
##
## Stage=III
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 216 31 1 0.968 0.0317 0.908 1.000
## 386 30 1 0.935 0.0441 0.853 1.000
## 420 29 1 0.903 0.0531 0.805 1.000
## 500 28 1 0.871 0.0602 0.761 0.997
## 518 27 1 0.839 0.0661 0.719 0.979
## 520 26 1 0.806 0.0710 0.679 0.958
## 573 25 1 0.774 0.0751 0.640 0.936
## 632 24 1 0.742 0.0786 0.603 0.913
## 665 23 1 0.710 0.0815 0.567 0.889
## 682 22 1 0.677 0.0840 0.531 0.864
## 797 21 1 0.645 0.0859 0.497 0.838
## 807 20 1 0.613 0.0875 0.463 0.811
## 857 18 1 0.579 0.0890 0.428 0.782
## 1107 14 1 0.538 0.0917 0.385 0.751
## 1157 13 1 0.496 0.0935 0.343 0.718
## 1682 10 1 0.447 0.0965 0.292 0.682
## 1835 9 1 0.397 0.0977 0.245 0.643
# Access to the sort summary table
summary(fit)$table
## records n.max n.start events *rmean *se(rmean) median 0.95LCL
## Stage=II 24 24 24 11 2774.640 310.5367 2963 2011
## Stage=III 31 31 31 17 2179.046 321.5120 1157 797
## 0.95UCL
## Stage=II NA
## Stage=III NA
ggsurvplot(fit, data = data, pval = TRUE)
# the log/rank test
surv.stage <- survdiff(Surv(time,Status) ~ Stage, data = data)
surv.stage
## Call:
## survdiff(formula = Surv(time, Status) ~ Stage, data = data)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## Stage=II 24 11 14.9 1.01 2.27
## Stage=III 31 17 13.1 1.15 2.27
##
## Chisq= 2.3 on 1 degrees of freedom, p= 0.1
fit <- survfit(Surv(time, Status) ~ class, data = data)
print(fit)
## Call: survfit(formula = Surv(time, Status) ~ class, data = data)
##
## n events median 0.95LCL 0.95UCL
## class=P 25 6 3044 2963 NA
## class=Pm 30 22 1107 682 2011
# Summary of survival curves
summary(fit)
## Call: survfit(formula = Surv(time, Status) ~ class, data = data)
##
## class=P
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 204 24 1 0.958 0.0408 0.8816 1
## 665 23 1 0.917 0.0564 0.8125 1
## 1726 15 1 0.856 0.0791 0.7137 1
## 2545 7 1 0.733 0.1319 0.5154 1
## 2963 4 1 0.550 0.1871 0.2824 1
## 3044 2 1 0.275 0.2158 0.0591 1
##
## class=Pm
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 173 30 1 0.967 0.0328 0.905 1.000
## 216 29 1 0.933 0.0455 0.848 1.000
## 386 28 1 0.900 0.0548 0.799 1.000
## 400 27 1 0.867 0.0621 0.753 0.997
## 420 26 1 0.833 0.0680 0.710 0.978
## 500 25 1 0.800 0.0730 0.669 0.957
## 518 24 1 0.767 0.0772 0.629 0.934
## 520 23 1 0.733 0.0807 0.591 0.910
## 573 22 1 0.700 0.0837 0.554 0.885
## 632 21 1 0.667 0.0861 0.518 0.859
## 682 20 1 0.633 0.0880 0.482 0.832
## 797 19 1 0.600 0.0894 0.448 0.804
## 807 18 1 0.567 0.0905 0.414 0.775
## 857 17 1 0.533 0.0911 0.382 0.745
## 1107 15 1 0.498 0.0917 0.347 0.714
## 1157 14 1 0.462 0.0918 0.313 0.682
## 1357 13 1 0.427 0.0913 0.280 0.649
## 1428 12 1 0.391 0.0904 0.249 0.615
## 1635 11 1 0.356 0.0889 0.218 0.580
## 1682 10 1 0.320 0.0868 0.188 0.545
## 1835 9 1 0.284 0.0841 0.159 0.508
## 2011 7 1 0.244 0.0813 0.127 0.469
# Access to the sort summary table
summary(fit)$table
## records n.max n.start events *rmean *se(rmean) median 0.95LCL
## class=P 25 25 25 6 2986.683 324.5666 3044 2963
## class=Pm 30 30 30 22 1716.567 276.5550 1107 682
## 0.95UCL
## class=P NA
## class=Pm 2011
ggsurvplot(fit, data = data, pval = TRUE)
# the log/rank test
surv.stage <- survdiff(Surv(time,Status) ~ class, data = data)
surv.stage
## Call:
## survdiff(formula = Surv(time, Status) ~ class, data = data)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## class=P 25 6 14.5 4.96 10.5
## class=Pm 30 22 13.5 5.30 10.5
##
## Chisq= 10.5 on 1 degrees of freedom, p= 0.001
newdata <- data[-which(data$sidedness == "rectum"),]
fit <- survfit(Surv(time, Status) ~ sidedness, data = newdata)
print(fit)
## Call: survfit(formula = Surv(time, Status) ~ sidedness, data = newdata)
##
## 5 observations deleted due to missingness
## n events median 0.95LCL 0.95UCL
## sidedness=left 21 9 3044 1428 NA
## sidedness=right 23 12 2011 1157 NA
# Summary of survival curves
summary(fit)
## Call: survfit(formula = Surv(time, Status) ~ sidedness, data = newdata)
##
## 5 observations deleted due to missingness
## sidedness=left
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 400 21 1 0.952 0.0465 0.866 1.000
## 520 20 1 0.905 0.0641 0.788 1.000
## 632 19 1 0.857 0.0764 0.720 1.000
## 682 18 1 0.810 0.0857 0.658 0.996
## 807 17 1 0.762 0.0929 0.600 0.968
## 1107 14 1 0.707 0.1010 0.535 0.936
## 1428 12 1 0.649 0.1084 0.467 0.900
## 1835 11 1 0.590 0.1135 0.404 0.860
## 3044 4 1 0.442 0.1534 0.224 0.873
##
## sidedness=right
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 173 23 1 0.957 0.0425 0.877 1.000
## 204 21 1 0.911 0.0601 0.800 1.000
## 420 20 1 0.865 0.0723 0.735 1.000
## 500 19 1 0.820 0.0816 0.675 0.997
## 665 18 1 0.774 0.0889 0.618 0.970
## 797 17 1 0.729 0.0946 0.565 0.940
## 857 15 1 0.680 0.1000 0.510 0.907
## 1157 13 1 0.628 0.1051 0.452 0.872
## 1635 10 1 0.565 0.1118 0.383 0.833
## 1726 9 1 0.502 0.1157 0.320 0.789
## 2011 8 1 0.440 0.1170 0.261 0.741
## 2545 4 1 0.330 0.1294 0.153 0.712
# Access to the sort summary table
summary(fit)$table
## records n.max n.start events *rmean *se(rmean) median 0.95LCL
## sidedness=left 21 21 21 9 2701.025 365.2286 3044 1428
## sidedness=right 23 23 23 12 2227.222 362.8170 2011 1157
## 0.95UCL
## sidedness=left NA
## sidedness=right NA
ggsurvplot(fit, data = newdata, pval = TRUE)
# the log/rank test
surv.stage <- survdiff(Surv(time,Status) ~ sidedness, data = newdata)
surv.stage
## Call:
## survdiff(formula = Surv(time, Status) ~ sidedness, data = newdata)
##
## n=44, 5 observations deleted due to missingness.
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## sidedness=left 21 9 11.06 0.382 0.812
## sidedness=right 23 12 9.94 0.425 0.812
##
## Chisq= 0.8 on 1 degrees of freedom, p= 0.4
xdata <- rnaseq1[ order(row.names(rnaseq1)), ]
rownames(DATASET1_bal) <- DATASET1_bal$ID
ydata <- as.data.frame(DATASET1_bal[ order(row.names(DATASET1_bal)), ])
rownames(ydata) <- ydata$ID
xdata <- xdata[rownames(xdata) %in%
rownames(ydata),]
ydata <- as.data.frame(ydata[rownames(ydata) %in%
rownames(xdata),])
# keep features with standard deviation > 0
xdata <- xdata[,sapply(seq(ncol(xdata)), function(ix) {sd(xdata[,ix])}) != 0]
xdata <- t(xdata)
group <- as.factor(ydata$class)
class <- as.data.frame(ydata$class)
edgeR.DGElist <- DGEList(counts=xdata, group = group)
# remove genes that do not have one count per million in at least 5 samples
keep <- rowSums(cpm(edgeR.DGElist) >= 1) >= 5
edgeR.DGElist <- edgeR.DGElist[keep ,]
# specify the design setup
design <- model.matrix(~group)
# estimate the dispersion for all read counts across all samples
edgeR.DGElist <- estimateDisp(edgeR.DGElist, design)
# fit the negative binomial model
edger_fit <- glmFit(edgeR.DGElist, design )
# perform the testing for every gene using the neg. binomial model
edger_lrt <- glmLRT(edger_fit)
summary(decideTests(edger_lrt))
## groupPm
## Down 1005
## NotSig 18381
## Up 835
# extract results from edger _lrt$ table plus adjusted p- values
DGE.results_edgeR <- topTags(edger_lrt, n = Inf , sort.by = "PValue" , adjust.method = "BH" )
topTags(DGE.results_edgeR) #table with the top10 DEGs
## Coefficient: groupPm
## logFC logCPM LR PValue FDR
## CHGA -8.017340 6.906952 62.54124 2.609236e-15 5.276137e-11
## TPH1 -6.062430 4.908484 46.40563 9.613905e-12 9.720139e-08
## VWA5B2 -4.644305 1.961363 37.35393 9.852224e-10 6.640728e-06
## STXBP5L -5.284757 2.612217 35.97941 1.994132e-09 1.008084e-05
## EGFR -3.291552 6.398804 34.90765 3.457204e-09 1.398162e-05
## IDO1 -3.202438 2.789425 33.54228 6.973165e-09 2.350073e-05
## PEG3 -4.262645 2.669096 33.23490 8.167131e-09 2.359251e-05
## MIR3978 4.038345 -1.040147 32.55673 1.157634e-08 2.926066e-05
## NLRP2 -2.397736 2.333213 32.04905 1.503287e-08 3.377553e-05
## PTPRN -5.380491 3.370588 31.74708 1.756133e-08 3.551077e-05
genes_deg <- DGE.results_edgeR$table
genes_deg <- genes_deg[which(genes_deg$FDR < 0.05),]
dim(genes_deg)#genes found to be differentially expressed
## [1] 1840 5
genes_deg$row <- row.names(genes_deg)
write_xlsx(genes_deg,"genes_deg_d2.xlsx")
top100_deg <- rownames(genes_deg[1:100,])
# highly expressed genes
high <- genes_deg[genes_deg$logFC > 0,]
dim(high)
## [1] 835 6
top100_high <- rownames(high[1:100,])
#low expressed genes
low <- genes_deg[genes_deg$logFC < 0,]
dim(low)
## [1] 1005 6
top100_low <- rownames(low[1:100,])
xdata.raw <- xdataT
# keep features with standard deviation > 0
xdata <- xdata.raw[,sapply(seq(ncol(xdata.raw)), function(ix) {sd(xdata.raw[,ix])}) != 0]
ydata.raw <- as.data.frame(clinic1$class)
ydata.raw$row <- DATASET1$ID
# ydata.raw$class <- c(rep(0,28),rep(1,34))
ydata.raw$`clinic1$class`[ydata.raw$`clinic1$class` == "P"] <- 0
ydata.raw$`clinic1$class`[ydata.raw$`clinic1$class` == "Pm"] <- 1
rownames(ydata.raw) <- DATASET1$ID
xdata <- xdata[rownames(xdata) %in%
rownames(ydata.raw),]
ydata.raw <- as.data.frame(ydata.raw[rownames(ydata.raw) %in%
rownames(xdata),])
xdata <- xdata[ order(row.names(xdata)), ]
ydata.raw <- ydata.raw[ order(row.names(ydata.raw)), ]
ydata <- as.data.frame(ydata.raw[,1:2])
ydata$`clinic1$class` <- as.numeric(ydata$`clinic1$class`)
Five classifiers were used: Decision trees, linear and radial support vector machines, logistic regression and random forest
xdata <- xdataT[,top100_deg[1:50]]
nomesgenes <- colnames(xdata)
colnames(xdata) <- paste0("Var", 1:50)
colnames(ydata) <- c("class","row")
xdata$type <- as.factor(ydata$class)
#xdata <- xdata[colMeans(xdata == 0) <= 0.6] #delete genes that have null values in at least 60% of the samples
## [1] 1
## [1] 1011
## [1] 2
## [1] 1022
## [1] 3
## [1] 1033
## [1] 4
## [1] 1044
## [1] 5
## [1] 1055
## [1] 6
## [1] 1066
## [1] 7
## [1] 1077
## [1] 8
## [1] 1088
## [1] 9
## [1] 1099
## [1] 10
## [1] 1110
## [1] 11
## [1] 1121
## [1] 12
## [1] 1132
## [1] 13
## [1] 1143
## [1] 14
## [1] 1154
## [1] 15
## [1] 1165
## [1] 16
## [1] 1176
## [1] 17
## [1] 1187
## [1] 18
## [1] 1198
## [1] 19
## [1] 1209
## [1] 20
## [1] 1220
## [1] 21
## [1] 1231
## [1] 22
## [1] 1242
## [1] 23
## [1] 1253
## [1] 24
## [1] 1264
## [1] 25
## [1] 1275
## [1] 26
## [1] 1286
## [1] 27
## [1] 1297
## [1] 28
## [1] 1308
## [1] 29
## [1] 1319
## [1] 30
## [1] 1330
## [1] 31
## [1] 1341
## [1] 32
## [1] 1352
## [1] 33
## [1] 1363
## [1] 34
## [1] 1374
## [1] 35
## [1] 1385
## [1] 36
## [1] 1396
## [1] 37
## [1] 1407
## [1] 38
## [1] 1418
## [1] 39
## [1] 1429
## [1] 40
## [1] 1440
## [1] 41
## [1] 1451
## [1] 42
## [1] 1462
## [1] 43
## [1] 1473
## [1] 44
## [1] 1484
## [1] 45
## [1] 1495
## [1] 46
## [1] 1506
## [1] 47
## [1] 1517
## [1] 48
## [1] 1528
## [1] 49
## [1] 1539
## [1] 50
## [1] 1550
## [1] 51
## [1] 1561
## [1] 52
## [1] 1572
## [1] 53
## [1] 1583
## [1] 54
## [1] 1594
## [1] 55
## [1] 1605
## [1] 56
## [1] 1616
## [1] 57
## [1] 1627
## [1] 58
## [1] 1638
## [1] 59
## [1] 1649
## [1] 60
## [1] 1660
## [1] 61
## [1] 1671
## [1] 62
## [1] 1682
## [1] 63
## [1] 1693
## [1] 64
## [1] 1704
## [1] 65
## [1] 1715
## [1] 66
## [1] 1726
## [1] 67
## [1] 1737
## [1] 68
## [1] 1748
## [1] 69
## [1] 1759
## [1] 70
## [1] 1770
## [1] 71
## [1] 1781
## [1] 72
## [1] 1792
## [1] 73
## [1] 1803
## [1] 74
## [1] 1814
## [1] 75
## [1] 1825
## [1] 76
## [1] 1836
## [1] 77
## [1] 1847
## [1] 78
## [1] 1858
## [1] 79
## [1] 1869
## [1] 80
## [1] 1880
## [1] 81
## [1] 1891
## [1] 82
## [1] 1902
## [1] 83
## [1] 1913
## [1] 84
## [1] 1924
## [1] 85
## [1] 1935
## [1] 86
## [1] 1946
## [1] 87
## [1] 1957
## [1] 88
## [1] 1968
## [1] 89
## [1] 1979
## [1] 90
## [1] 1990
## [1] 91
## [1] 2001
## [1] 92
## [1] 2012
## [1] 93
## [1] 2023
## [1] 94
## [1] 2034
## [1] 95
## [1] 2045
## [1] 96
## [1] 2056
## [1] 97
## [1] 2067
## [1] 98
## [1] 2078
## [1] 99
## [1] 2089
## [1] 100
## [1] 2100
# acc
acc_trees <- acc_train[1,]
hist(acc_trees)
mean(acc_trees)
## [1] 0.9844737
median(acc_trees)
## [1] 0.9736842
sd(acc_trees)
## [1] 0.01634458
acc_svm <- acc_train[2,]
hist(acc_svm)
mean(acc_svm)
## [1] 0.9318421
median(acc_svm)
## [1] 0.9473684
sd(acc_svm)
## [1] 0.03852665
acc_svmR <- acc_train[3,]
hist(acc_svmR)
mean(acc_svmR)
## [1] 0.9344737
median(acc_svmR)
## [1] 0.9736842
sd(acc_svmR)
## [1] 0.1094652
acc_logs <- acc_train[4,]
hist(acc_logs)
mean(acc_logs)
## [1] 1
median(acc_logs)
## [1] 1
sd(acc_logs)
## [1] 0
acc_rf <- acc_train[5,]
hist(acc_rf)
mean(acc_rf)
## [1] 1
median(acc_rf)
## [1] 1
sd(acc_rf)
## [1] 0
# auc
auc_trees <- auc_train[1,]
mean(auc_trees)
## [1] 0.9830392
median(auc_trees)
## [1] 0.9761905
sd(auc_trees)
## [1] 0.0177827
auc_svm <- auc_train[2,]
mean(auc_svm)
## [1] 0.9295938
median(auc_svm)
## [1] 0.9439776
sd(auc_svm)
## [1] 0.04390672
auc_svmR <- auc_train[3,]
mean(auc_svmR)
## [1] 0.9308543
median(auc_svmR)
## [1] 0.9733894
sd(auc_svmR)
## [1] 0.122283
auc_logs <- auc_train[4,]
mean(auc_logs)
## [1] 1
median(auc_logs)
## [1] 1
sd(auc_logs)
## [1] 0
auc_rf <- auc_train[5,]
mean(auc_rf)
## [1] 1
median(auc_rf)
## [1] 1
sd(auc_rf)
## [1] 0
# miscl
miscl_trees <- miscl_train[1,]
mean(miscl_trees)
## [1] 0.59
median(miscl_trees)
## [1] 1
sd(miscl_trees)
## [1] 0.6210939
miscl_svm <- miscl_train[2,]
mean(miscl_svm)
## [1] 2.59
median(miscl_svm)
## [1] 2
sd(miscl_svm)
## [1] 1.464013
miscl_svmR <- miscl_train[3,]
mean(miscl_svmR)
## [1] 2.49
median(miscl_svmR)
## [1] 1
sd(miscl_svmR)
## [1] 4.159679
miscl_logs <- miscl_train[4,]
mean(miscl_logs)
## [1] 0
median(miscl_logs)
## [1] 0
sd(miscl_logs)
## [1] 0
miscl_rf <- miscl_train[5,]
mean(miscl_rf)
## [1] 0
median(miscl_rf)
## [1] 0
sd(miscl_rf)
## [1] 0
# sensitivity
sensitivity_trees <- sensitivity_train[1,]
mean(sensitivity_trees)
## [1] 0.9694118
median(sensitivity_trees)
## [1] 1
sd(sensitivity_trees)
## [1] 0.03394119
sensitivity_svm <- sensitivity_train[2,]
mean(sensitivity_svm)
## [1] 0.9082353
median(sensitivity_svm)
## [1] 0.9411765
sd(sensitivity_svm)
## [1] 0.109753
sensitivity_svmR <- sensitivity_train[3,]
mean(sensitivity_svmR)
## [1] 0.8964706
median(sensitivity_svmR)
## [1] 1
sd(sensitivity_svmR)
## [1] 0.2487422
sensitivity_logs <- sensitivity_train[4,]
mean(sensitivity_logs)
## [1] 1
median(sensitivity_logs)
## [1] 1
sd(sensitivity_logs)
## [1] 0
sensitivity_rf <- sensitivity_train[5,]
mean(sensitivity_rf)
## [1] 1
median(sensitivity_rf)
## [1] 1
sd(sensitivity_rf)
## [1] 0
# specificity
specificity_trees <- specificity_train[1,]
mean(specificity_trees)
## [1] 0.9966667
median(specificity_trees)
## [1] 1
sd(specificity_trees)
## [1] 0.01221107
specificity_svm <- specificity_train[2,]
mean(specificity_svm)
## [1] 0.9509524
median(specificity_svm)
## [1] 0.952381
sd(specificity_svm)
## [1] 0.04994557
specificity_svmR <- specificity_train[3,]
mean(specificity_svmR)
## [1] 0.9652381
median(specificity_svmR)
## [1] 0.9761905
sd(specificity_svmR)
## [1] 0.04326141
specificity_logs <- specificity_train[4,]
mean(specificity_logs)
## [1] 1
median(specificity_logs)
## [1] 1
sd(specificity_logs)
## [1] 0
specificity_rf <- specificity_train[5,]
mean(specificity_rf)
## [1] 1
median(specificity_rf)
## [1] 1
sd(specificity_rf)
## [1] 0
# fneg
fneg_trees <- fneg_train[1,]
mean(fneg_trees)
## [1] 0.52
median(fneg_trees)
## [1] 0
sd(fneg_trees)
## [1] 0.5770003
fneg_svm <- fneg_train[2,]
mean(fneg_svm)
## [1] 1.56
median(fneg_svm)
## [1] 1
sd(fneg_svm)
## [1] 1.865801
fneg_svmR <- fneg_train[3,]
mean(fneg_svmR)
## [1] 1.76
median(fneg_svmR)
## [1] 0
sd(fneg_svmR)
## [1] 4.228618
fneg_logs <- fneg_train[4,]
mean(fneg_logs)
## [1] 0
median(fneg_logs)
## [1] 0
sd(fneg_logs)
## [1] 0
fneg_rf <- fneg_train[5,]
mean(fneg_rf)
## [1] 0
median(fneg_rf)
## [1] 0
sd(fneg_rf)
## [1] 0
# acc
acc_trees <- acc[1,]
hist(acc_trees)
mean(acc_trees)
## [1] 0.6629412
median(acc_trees)
## [1] 0.6470588
sd(acc_trees)
## [1] 0.10269
acc_svm <- acc[2,]
hist(acc_svm)
mean(acc_svm)
## [1] 0.7264706
median(acc_svm)
## [1] 0.7058824
sd(acc_svm)
## [1] 0.09211134
acc_svmR <- acc[3,]
hist(acc_svmR)
mean(acc_svmR)
## [1] 0.62
median(acc_svmR)
## [1] 0.5882353
sd(acc_svmR)
## [1] 0.1123029
acc_logs <- acc[4,]
hist(acc_logs)
mean(acc_logs)
## [1] 0.6564706
median(acc_logs)
## [1] 0.6470588
sd(acc_logs)
## [1] 0.08473757
acc_rf <- acc[5,]
hist(acc_rf)
mean(acc_rf)
## [1] 0.7288235
median(acc_rf)
## [1] 0.7058824
sd(acc_rf)
## [1] 0.08961878
# auc
auc_trees <- auc[1,]
mean(auc_trees)
## [1] 0.6609028
median(auc_trees)
## [1] 0.6527778
sd(auc_trees)
## [1] 0.1016463
auc_svm <- auc[2,]
mean(auc_svm)
## [1] 0.7238889
median(auc_svm)
## [1] 0.7083333
sd(auc_svm)
## [1] 0.08707307
auc_svmR <- auc[3,]
mean(auc_svmR)
## [1] 0.6244444
median(auc_svmR)
## [1] 0.6145833
sd(auc_svmR)
## [1] 0.1137441
auc_logs <- auc[4,]
mean(auc_logs)
## [1] 0.6536111
median(auc_logs)
## [1] 0.6388889
sd(auc_logs)
## [1] 0.08192007
auc_rf <- auc[5,]
mean(auc_rf)
## [1] 0.7268056
median(auc_rf)
## [1] 0.7083333
sd(auc_rf)
## [1] 0.09063707
# miscl
miscl_trees <- miscl[1,]
mean(miscl_trees)
## [1] 5.73
median(miscl_trees)
## [1] 6
sd(miscl_trees)
## [1] 1.745731
miscl_svm <- miscl[2,]
mean(miscl_svm)
## [1] 4.65
median(miscl_svm)
## [1] 5
sd(miscl_svm)
## [1] 1.565893
miscl_svmR <- miscl[3,]
mean(miscl_svmR)
## [1] 6.46
median(miscl_svmR)
## [1] 7
sd(miscl_svmR)
## [1] 1.909149
miscl_logs <- miscl[4,]
mean(miscl_logs)
## [1] 5.84
median(miscl_logs)
## [1] 6
sd(miscl_logs)
## [1] 1.440539
miscl_rf <- miscl[5,]
mean(miscl_rf)
## [1] 4.61
median(miscl_rf)
## [1] 5
sd(miscl_rf)
## [1] 1.523519
# sensitivity
sensitivity_trees <- sensitivity[1,]
mean(sensitivity_trees)
## [1] 0.59625
median(sensitivity_trees)
## [1] 0.625
sd(sensitivity_trees)
## [1] 0.1862161
sensitivity_svm <- sensitivity[2,]
mean(sensitivity_svm)
## [1] 0.6275
median(sensitivity_svm)
## [1] 0.625
sd(sensitivity_svm)
## [1] 0.1986221
sensitivity_svmR <- sensitivity[3,]
mean(sensitivity_svmR)
## [1] 0.625
median(sensitivity_svmR)
## [1] 0.75
sd(sensitivity_svmR)
## [1] 0.3207866
sensitivity_logs <- sensitivity[4,]
mean(sensitivity_logs)
## [1] 0.565
median(sensitivity_logs)
## [1] 0.625
sd(sensitivity_logs)
## [1] 0.1623299
sensitivity_rf <- sensitivity[5,]
mean(sensitivity_rf)
## [1] 0.6925
median(sensitivity_rf)
## [1] 0.75
sd(sensitivity_rf)
## [1] 0.1661165
# specificity
specificity_trees <- specificity[1,]
mean(specificity_trees)
## [1] 0.7222222
median(specificity_trees)
## [1] 0.7777778
sd(specificity_trees)
## [1] 0.1468801
specificity_svm <- specificity[2,]
mean(specificity_svm)
## [1] 0.8144444
median(specificity_svm)
## [1] 0.7777778
sd(specificity_svm)
## [1] 0.151523
specificity_svmR <- specificity[3,]
mean(specificity_svmR)
## [1] 0.6155556
median(specificity_svmR)
## [1] 0.5555556
sd(specificity_svmR)
## [1] 0.2224578
specificity_logs <- specificity[4,]
mean(specificity_logs)
## [1] 0.7377778
median(specificity_logs)
## [1] 0.7777778
sd(specificity_logs)
## [1] 0.131677
specificity_rf <- specificity[5,]
mean(specificity_rf)
## [1] 0.7611111
median(specificity_rf)
## [1] 0.7777778
sd(specificity_rf)
## [1] 0.1398117
# fneg
fneg_trees <- fneg[1,]
mean(fneg_trees)
## [1] 3.23
median(fneg_trees)
## [1] 3
sd(fneg_trees)
## [1] 1.489729
fneg_svm <- fneg[2,]
mean(fneg_svm)
## [1] 2.98
median(fneg_svm)
## [1] 3
sd(fneg_svm)
## [1] 1.588977
fneg_svmR <- fneg[3,]
mean(fneg_svmR)
## [1] 3
median(fneg_svmR)
## [1] 2
sd(fneg_svmR)
## [1] 2.566293
fneg_logs <- fneg[4,]
mean(fneg_logs)
## [1] 3.48
median(fneg_logs)
## [1] 3
sd(fneg_logs)
## [1] 1.29864
fneg_rf <- fneg[5,]
mean(fneg_rf)
## [1] 2.46
median(fneg_rf)
## [1] 2
sd(fneg_rf)
## [1] 1.328932
xdata <- xdataT
ydata.raw <- as.data.frame(clinic1$class)
ydata.raw$row <- DATASET1$ID
#
# # ydata.raw$class <- c(rep(0,28),rep(1,34))
#
ydata.raw$`clinic1$class`[ydata.raw$`clinic1$class` == "P"] <- 0
ydata.raw$`clinic1$class`[ydata.raw$`clinic1$class` == "Pm"] <- 1
rownames(ydata.raw) <- DATASET1$ID
xdata <- xdata[rownames(xdata) %in%
rownames(ydata.raw),]
ydata.raw <- as.data.frame(ydata.raw[rownames(ydata.raw) %in%
rownames(xdata),])
xdata <- xdata[ order(row.names(xdata)), ]
ydata.raw <- ydata.raw[ order(row.names(ydata.raw)), ]
ydata <- as.data.frame(ydata.raw)
ydata$`clinic1$class` <- as.numeric(ydata$`clinic1$class`)
## [1] 0
## [1] 1
## [1] 1011
## [1] "data"
## [1] "EN"
## [1] 1
## [1] 2
## [1] 1022
## [1] "data"
## [1] "EN"
## [1] 2
## [1] 3
## [1] 1033
## [1] "data"
## [1] "EN"
## [1] 3
## [1] 4
## [1] 1044
## [1] "data"
## [1] "EN"
## [1] 4
## [1] 5
## [1] 1055
## [1] "data"
## [1] "EN"
## [1] 5
## [1] 6
## [1] 1066
## [1] "data"
## [1] "EN"
## [1] 6
## [1] 7
## [1] 1077
## [1] "data"
## [1] "EN"
## [1] 7
## [1] 8
## [1] 1088
## [1] "data"
## [1] "EN"
## [1] 8
## [1] 9
## [1] 1099
## [1] "data"
## [1] "EN"
## [1] 9
## [1] 10
## [1] 1110
## [1] "data"
## [1] "EN"
## [1] 10
## [1] 11
## [1] 1121
## [1] "data"
## [1] "EN"
## [1] 11
## [1] 12
## [1] 1132
## [1] "data"
## [1] "EN"
## [1] 12
## [1] 13
## [1] 1143
## [1] "data"
## [1] "EN"
## [1] 13
## [1] 14
## [1] 1154
## [1] "data"
## [1] "EN"
## [1] 14
## [1] 15
## [1] 1165
## [1] "data"
## [1] "EN"
## [1] 15
## [1] 16
## [1] 1176
## [1] "data"
## [1] "EN"
## [1] 16
## [1] 17
## [1] 1187
## [1] "data"
## [1] "EN"
## [1] 17
## [1] 18
## [1] 1198
## [1] "data"
## [1] "EN"
## [1] 18
## [1] 19
## [1] 1209
## [1] "data"
## [1] "EN"
## [1] 19
## [1] 20
## [1] 1220
## [1] "data"
## [1] "EN"
## [1] 20
## [1] 21
## [1] 1231
## [1] "data"
## [1] "EN"
## [1] 21
## [1] 22
## [1] 1242
## [1] "data"
## [1] "EN"
## [1] 22
## [1] 23
## [1] 1253
## [1] "data"
## [1] "EN"
## [1] 23
## [1] 24
## [1] 1264
## [1] "data"
## [1] "EN"
## [1] 24
## [1] 25
## [1] 1275
## [1] "data"
## [1] "EN"
## [1] 25
## [1] 26
## [1] 1286
## [1] "data"
## [1] "EN"
## [1] 26
## [1] 27
## [1] 1297
## [1] "data"
## [1] "EN"
## [1] 27
## [1] 28
## [1] 1308
## [1] "data"
## [1] "EN"
## [1] 28
## [1] 29
## [1] 1319
## [1] "data"
## [1] "EN"
## [1] 29
## [1] 30
## [1] 1330
## [1] "data"
## [1] "EN"
## [1] 30
## [1] 31
## [1] 1341
## [1] "data"
## [1] "EN"
## [1] 31
## [1] 32
## [1] 1352
## [1] "data"
## [1] "EN"
## [1] 32
## [1] 33
## [1] 1363
## [1] "data"
## [1] "EN"
## [1] 33
## [1] 34
## [1] 1374
## [1] "data"
## [1] "EN"
## [1] 34
## [1] 35
## [1] 1385
## [1] "data"
## [1] "EN"
## [1] 35
## [1] 36
## [1] 1396
## [1] "data"
## [1] "EN"
## [1] 36
## [1] 37
## [1] 1407
## [1] "data"
## [1] "EN"
## [1] 37
## [1] 38
## [1] 1418
## [1] "data"
## [1] "EN"
## [1] 38
## [1] 39
## [1] 1429
## [1] "data"
## [1] "EN"
## [1] 39
## [1] 40
## [1] 1440
## [1] "data"
## [1] "EN"
## [1] 40
## [1] 41
## [1] 1451
## [1] "data"
## [1] "EN"
## [1] 41
## [1] 42
## [1] 1462
## [1] "data"
## [1] "EN"
## [1] 42
## [1] 43
## [1] 1473
## [1] "data"
## [1] "EN"
## [1] 43
## [1] 44
## [1] 1484
## [1] "data"
## [1] "EN"
## [1] 44
## [1] 45
## [1] 1495
## [1] "data"
## [1] "EN"
## [1] 45
## [1] 46
## [1] 1506
## [1] "data"
## [1] "EN"
## [1] 46
## [1] 47
## [1] 1517
## [1] "data"
## [1] "EN"
## [1] 47
## [1] 48
## [1] 1528
## [1] "data"
## [1] "EN"
## [1] 48
## [1] 49
## [1] 1539
## [1] "data"
## [1] "EN"
## [1] 49
## [1] 50
## [1] 1550
## [1] "data"
## [1] "EN"
## [1] 50
## [1] 51
## [1] 1561
## [1] "data"
## [1] "EN"
## [1] 51
## [1] 52
## [1] 1572
## [1] "data"
## [1] "EN"
## [1] 52
## [1] 53
## [1] 1583
## [1] "data"
## [1] "EN"
## [1] 53
## [1] 54
## [1] 1594
## [1] "data"
## [1] "EN"
## [1] 54
## [1] 55
## [1] 1605
## [1] "data"
## [1] "en train ups"
## [1] "EN"
## [1] 54
## [1] 56
## [1] 1616
## [1] "data"
## [1] "EN"
## [1] 55
## [1] 57
## [1] 1627
## [1] "data"
## [1] "EN"
## [1] 56
## [1] 58
## [1] 1638
## [1] "data"
## [1] "EN"
## [1] 57
## [1] 59
## [1] 1649
## [1] "data"
## [1] "EN"
## [1] 58
## [1] 60
## [1] 1660
## [1] "data"
## [1] "EN"
## [1] 59
## [1] 61
## [1] 1671
## [1] "data"
## [1] "EN"
## [1] 60
## [1] 62
## [1] 1682
## [1] "data"
## [1] "EN"
## [1] 61
## [1] 63
## [1] 1693
## [1] "data"
## [1] "EN"
## [1] 62
## [1] 64
## [1] 1704
## [1] "data"
## [1] "EN"
## [1] 63
## [1] 65
## [1] 1715
## [1] "data"
## [1] "EN"
## [1] 64
## [1] 66
## [1] 1726
## [1] "data"
## [1] "EN"
## [1] 65
## [1] 67
## [1] 1737
## [1] "data"
## [1] "EN"
## [1] 66
## [1] 68
## [1] 1748
## [1] "data"
## [1] "EN"
## [1] 67
## [1] 69
## [1] 1759
## [1] "data"
## [1] "EN"
## [1] 68
## [1] 70
## [1] 1770
## [1] "data"
## [1] "en train ups"
## [1] "en pred ups"
## [1] "EN"
## [1] 68
## [1] 71
## [1] 1781
## [1] "data"
## [1] "EN"
## [1] 69
## [1] 72
## [1] 1792
## [1] "data"
## [1] "EN"
## [1] 70
## [1] 73
## [1] 1803
## [1] "data"
## [1] "EN"
## [1] 71
## [1] 74
## [1] 1814
## [1] "data"
## [1] "EN"
## [1] 72
## [1] 75
## [1] 1825
## [1] "data"
## [1] "EN"
## [1] 73
## [1] 76
## [1] 1836
## [1] "data"
## [1] "EN"
## [1] 74
## [1] 77
## [1] 1847
## [1] "data"
## [1] "EN"
## [1] 75
## [1] 78
## [1] 1858
## [1] "data"
## [1] "EN"
## [1] 76
## [1] 79
## [1] 1869
## [1] "data"
## [1] "EN"
## [1] 77
## [1] 80
## [1] 1880
## [1] "data"
## [1] "EN"
## [1] 78
## [1] 81
## [1] 1891
## [1] "data"
## [1] "EN"
## [1] 79
## [1] 82
## [1] 1902
## [1] "data"
## [1] "EN"
## [1] 80
## [1] 83
## [1] 1913
## [1] "data"
## [1] "EN"
## [1] 81
## [1] 84
## [1] 1924
## [1] "data"
## [1] "EN"
## [1] 82
## [1] 85
## [1] 1935
## [1] "data"
## [1] "EN"
## [1] 83
## [1] 86
## [1] 1946
## [1] "data"
## [1] "EN"
## [1] 84
## [1] 87
## [1] 1957
## [1] "data"
## [1] "EN"
## [1] 85
## [1] 88
## [1] 1968
## [1] "data"
## [1] "EN"
## [1] 86
## [1] 89
## [1] 1979
## [1] "data"
## [1] "EN não selecionou variaveis"
## [1] 86
## [1] 90
## [1] 1990
## [1] "data"
## [1] "EN"
## [1] 87
## [1] 91
## [1] 2001
## [1] "data"
## [1] "EN"
## [1] 88
## [1] 92
## [1] 2012
## [1] "data"
## [1] "EN"
## [1] 89
## [1] 93
## [1] 2023
## [1] "data"
## [1] "EN"
## [1] 90
## [1] 94
## [1] 2034
## [1] "data"
## [1] "EN"
## [1] 91
## [1] 95
## [1] 2045
## [1] "data"
## [1] "EN"
## [1] 92
## [1] 96
## [1] 2056
## [1] "data"
## [1] "EN"
## [1] 93
## [1] 97
## [1] 2067
## [1] "data"
## [1] "EN"
## [1] 94
## [1] 98
## [1] 2078
## [1] "data"
## [1] "EN"
## [1] 95
## [1] 99
## [1] 2089
## [1] "data"
## [1] "EN"
## [1] 96
## [1] 100
## [1] 2100
## [1] "data"
## [1] "EN"
## [1] 97
## [1] 101
## [1] 2111
## [1] "data"
## [1] "EN"
## [1] 98
## [1] 102
## [1] 2122
## [1] "data"
## [1] "en train ups"
## [1] "en pred ups"
## [1] "EN"
## [1] 98
## [1] 103
## [1] 2133
## [1] "data"
## [1] "EN"
## [1] 99
## [1] 104
## [1] 2144
## [1] "data"
## [1] "en train ups"
## [1] "EN"
## [1] 99
## [1] 105
## [1] 2155
## [1] "data"
## [1] "EN"
## [1] 1
## [1] 0
## [1] 2166
## [1] "data"
## [1] "iTwiner"
## [1] 2
## [1] 1
## [1] 2177
## [1] "data"
## [1] "iTwiner"
## [1] 3
## [1] 2
## [1] 2188
## [1] "data"
## [1] "iTwiner"
## [1] 4
## [1] 3
## [1] 2199
## [1] "data"
## [1] "iTwiner"
## [1] 5
## [1] 4
## [1] 2210
## [1] "data"
## [1] "iTwiner"
## [1] 6
## [1] 5
## [1] 2221
## [1] "data"
## [1] "iTwiner"
## [1] 7
## [1] 6
## [1] 2232
## [1] "data"
## [1] "iTwiner"
## [1] 8
## [1] 7
## [1] 2243
## [1] "data"
## [1] "iTwiner"
## [1] 9
## [1] 8
## [1] 2254
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner"
## [1] 10
## [1] 8
## [1] 2265
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 11
## [1] 8
## [1] 2276
## [1] "data"
## [1] "iTwiner"
## [1] 12
## [1] 9
## [1] 2287
## [1] "data"
## [1] "iTwiner"
## [1] 13
## [1] 10
## [1] 2298
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner"
## [1] 14
## [1] 10
## [1] 2309
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 15
## [1] 10
## [1] 2320
## [1] "data"
## [1] "iTwiner"
## [1] 16
## [1] 11
## [1] 2331
## [1] "data"
## [1] "iTwiner"
## [1] 17
## [1] 12
## [1] 2342
## [1] "data"
## [1] "iTwiner"
## [1] 18
## [1] 13
## [1] 2353
## [1] "data"
## [1] "iTwiner"
## [1] 19
## [1] 14
## [1] 2364
## [1] "data"
## [1] "iTwiner"
## [1] 20
## [1] 15
## [1] 2375
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 21
## [1] 15
## [1] 2386
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner"
## [1] 22
## [1] 15
## [1] 2397
## [1] "data"
## [1] "iTwiner"
## [1] 23
## [1] 16
## [1] 2408
## [1] "data"
## [1] "iTwiner"
## [1] 24
## [1] 17
## [1] 2419
## [1] "data"
## [1] "iTwiner"
## [1] 25
## [1] 18
## [1] 2430
## [1] "data"
## [1] "iTwiner"
## [1] 26
## [1] 19
## [1] 2441
## [1] "data"
## [1] "iTwiner"
## [1] 27
## [1] 20
## [1] 2452
## [1] "data"
## [1] "iTwiner"
## [1] 28
## [1] 21
## [1] 2463
## [1] "data"
## [1] "iTwiner"
## [1] 29
## [1] 22
## [1] 2474
## [1] "data"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 30
## [1] 23
## [1] 2485
## [1] "data"
## [1] "iTwiner"
## [1] 31
## [1] 24
## [1] 2496
## [1] "data"
## [1] "iTwiner"
## [1] 32
## [1] 25
## [1] 2507
## [1] "data"
## [1] "iTwiner"
## [1] 33
## [1] 26
## [1] 2518
## [1] "data"
## [1] "iTwiner"
## [1] 34
## [1] 27
## [1] 2529
## [1] "data"
## [1] "iTwiner"
## [1] 35
## [1] 28
## [1] 2540
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner"
## [1] 36
## [1] 28
## [1] 2551
## [1] "data"
## [1] "iTwiner"
## [1] 37
## [1] 29
## [1] 2562
## [1] "data"
## [1] "iTwiner"
## [1] 38
## [1] 30
## [1] 2573
## [1] "data"
## [1] "iTwiner"
## [1] 39
## [1] 31
## [1] 2584
## [1] "data"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 40
## [1] 32
## [1] 2595
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 41
## [1] 32
## [1] 2606
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 42
## [1] 32
## [1] 2617
## [1] "data"
## [1] "iTwiner"
## [1] 43
## [1] 33
## [1] 2628
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner"
## [1] 44
## [1] 33
## [1] 2639
## [1] "data"
## [1] "iTwiner"
## [1] 45
## [1] 34
## [1] 2650
## [1] "data"
## [1] "iTwiner"
## [1] 46
## [1] 35
## [1] 2661
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner"
## [1] 47
## [1] 35
## [1] 2672
## [1] "data"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 48
## [1] 36
## [1] 2683
## [1] "data"
## [1] "iTwiner"
## [1] 49
## [1] 37
## [1] 2694
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 50
## [1] 37
## [1] 2705
## [1] "data"
## [1] "iTwiner"
## [1] 51
## [1] 38
## [1] 2716
## [1] "data"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 52
## [1] 39
## [1] 2727
## [1] "data"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 53
## [1] 40
## [1] 2738
## [1] "data"
## [1] "iTwiner"
## [1] 54
## [1] 41
## [1] 2749
## [1] "data"
## [1] "iTwiner"
## [1] 55
## [1] 42
## [1] 2760
## [1] "data"
## [1] "iTwiner"
## [1] 56
## [1] 43
## [1] 2771
## [1] "data"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 57
## [1] 44
## [1] 2782
## [1] "data"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 58
## [1] 45
## [1] 2793
## [1] "data"
## [1] "iTwiner"
## [1] 59
## [1] 46
## [1] 2804
## [1] "data"
## [1] "iTwiner"
## [1] 60
## [1] 47
## [1] 2815
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 61
## [1] 47
## [1] 2826
## [1] "data"
## [1] "iTwiner"
## [1] 62
## [1] 48
## [1] 2837
## [1] "data"
## [1] "iTwiner"
## [1] 63
## [1] 49
## [1] 2848
## [1] "data"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 64
## [1] 50
## [1] 2859
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner"
## [1] 65
## [1] 50
## [1] 2870
## [1] "data"
## [1] "iTwiner"
## [1] 66
## [1] 51
## [1] 2881
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner"
## [1] 67
## [1] 51
## [1] 2892
## [1] "data"
## [1] "iTwiner"
## [1] 68
## [1] 52
## [1] 2903
## [1] "data"
## [1] "iTwiner"
## [1] 69
## [1] 53
## [1] 2914
## [1] "data"
## [1] "iTwiner"
## [1] 70
## [1] 54
## [1] 2925
## [1] "data"
## [1] "iTwiner"
## [1] 71
## [1] 55
## [1] 2936
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner"
## [1] 72
## [1] 55
## [1] 2947
## [1] "data"
## [1] "iTwiner"
## [1] 73
## [1] 56
## [1] 2958
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner"
## [1] 74
## [1] 56
## [1] 2969
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 75
## [1] 56
## [1] 2980
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner"
## [1] 76
## [1] 56
## [1] 2991
## [1] "data"
## [1] "iTwiner"
## [1] 77
## [1] 57
## [1] 3002
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 78
## [1] 57
## [1] 3013
## [1] "data"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 79
## [1] 58
## [1] 3024
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 80
## [1] 58
## [1] 3035
## [1] "data"
## [1] "iTwiner"
## [1] 81
## [1] 59
## [1] 3046
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 82
## [1] 59
## [1] 3057
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 83
## [1] 59
## [1] 3068
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner"
## [1] 84
## [1] 59
## [1] 3079
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 85
## [1] 59
## [1] 3090
## [1] "data"
## [1] "iTwiner não correu"
## [1] 86
## [1] 59
## [1] 3101
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 87
## [1] 59
## [1] 3112
## [1] "data"
## [1] "iTwiner"
## [1] 88
## [1] 60
## [1] 3123
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 89
## [1] 60
## [1] 3134
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 90
## [1] 60
## [1] 3145
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner"
## [1] 91
## [1] 60
## [1] 3156
## [1] "data"
## [1] "iTwiner"
## [1] 92
## [1] 61
## [1] 3167
## [1] "data"
## [1] "iTwiner"
## [1] 93
## [1] 62
## [1] 3178
## [1] "data"
## [1] "iTwiner"
## [1] 94
## [1] 63
## [1] 3189
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 95
## [1] 63
## [1] 3200
## [1] "data"
## [1] "iTwiner"
## [1] 96
## [1] 64
## [1] 3211
## [1] "data"
## [1] "iTwiner"
## [1] 97
## [1] 65
## [1] 3222
## [1] "data"
## [1] "iTwiner"
## [1] 98
## [1] 66
## [1] 3233
## [1] "data"
## [1] "iTwiner"
## [1] 99
## [1] 67
## [1] 3244
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner"
## [1] 100
## [1] 67
## [1] 3255
## [1] "data"
## [1] "iTwiner"
## [1] 101
## [1] 68
## [1] 3266
## [1] "data"
## [1] "iTwiner"
## [1] 102
## [1] 69
## [1] 3277
## [1] "data"
## [1] "iTwiner"
## [1] 103
## [1] 70
## [1] 3288
## [1] "data"
## [1] "iTwiner"
## [1] 104
## [1] 71
## [1] 3299
## [1] "data"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 105
## [1] 72
## [1] 3310
## [1] "data"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 106
## [1] 73
## [1] 3321
## [1] "data"
## [1] "iTwiner"
## [1] 107
## [1] 74
## [1] 3332
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner"
## [1] 108
## [1] 74
## [1] 3343
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 109
## [1] 74
## [1] 3354
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 110
## [1] 74
## [1] 3365
## [1] "data"
## [1] "iTwiner"
## [1] 111
## [1] 75
## [1] 3376
## [1] "data"
## [1] "iTwiner"
## [1] 112
## [1] 76
## [1] 3387
## [1] "data"
## [1] "iTwiner"
## [1] 113
## [1] 77
## [1] 3398
## [1] "data"
## [1] "iTwiner"
## [1] 114
## [1] 78
## [1] 3409
## [1] "data"
## [1] "iTwiner"
## [1] 115
## [1] 79
## [1] 3420
## [1] "data"
## [1] "iTwiner"
## [1] 116
## [1] 80
## [1] 3431
## [1] "data"
## [1] "iTwiner"
## [1] 117
## [1] 81
## [1] 3442
## [1] "data"
## [1] "iTwiner"
## [1] 118
## [1] 82
## [1] 3453
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 119
## [1] 82
## [1] 3464
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner"
## [1] 120
## [1] 82
## [1] 3475
## [1] "data"
## [1] "iTwiner"
## [1] 121
## [1] 83
## [1] 3486
## [1] "data"
## [1] "iTwiner"
## [1] 122
## [1] 84
## [1] 3497
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 123
## [1] 84
## [1] 3508
## [1] "data"
## [1] "iTwiner"
## [1] 124
## [1] 85
## [1] 3519
## [1] "data"
## [1] "iTwiner"
## [1] 125
## [1] 86
## [1] 3530
## [1] "data"
## [1] "iTwiner"
## [1] 126
## [1] 87
## [1] 3541
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 127
## [1] 87
## [1] 3552
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 128
## [1] 87
## [1] 3563
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 129
## [1] 87
## [1] 3574
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner"
## [1] 130
## [1] 87
## [1] 3585
## [1] "data"
## [1] "iTwiner"
## [1] 131
## [1] 88
## [1] 3596
## [1] "data"
## [1] "iTwiner"
## [1] 132
## [1] 89
## [1] 3607
## [1] "data"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 133
## [1] 90
## [1] 3618
## [1] "data"
## [1] "iTwiner"
## [1] 134
## [1] 91
## [1] 3629
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner"
## [1] 135
## [1] 91
## [1] 3640
## [1] "data"
## [1] "iTwiner"
## [1] 136
## [1] 92
## [1] 3651
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner"
## [1] 137
## [1] 92
## [1] 3662
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 138
## [1] 92
## [1] 3673
## [1] "data"
## [1] "iTwiner"
## [1] 139
## [1] 93
## [1] 3684
## [1] "data"
## [1] "iTwiner"
## [1] 140
## [1] 94
## [1] 3695
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 141
## [1] 94
## [1] 3706
## [1] "data"
## [1] "iTwiner"
## [1] 142
## [1] 95
## [1] 3717
## [1] "data"
## [1] "iTwiner"
## [1] 143
## [1] 96
## [1] 3728
## [1] "data"
## [1] "iTwiner"
## [1] 144
## [1] 97
## [1] 3739
## [1] "data"
## [1] "iTwiner"
## [1] 145
## [1] 98
## [1] 3750
## [1] "data"
## [1] "iTwiner"
## [1] 146
## [1] 99
## [1] 3761
## [1] "data"
## [1] "iTwiner"
runs_en_tr <- which(acc_cox_tr[1,]!=0)
runs_itw_tr <- which(acc_cox_tr[2,]!=0)
runs_en_ts <- which(acc_cox_tes[1,]!=0)
runs_itw_ts <- which(acc_cox_tes[2,]!=0)
# median number of variables selected
nvar_en <- nvar_selected_1[1,]
nvar_en <- nvar_en[runs_en_tr]
mean(nvar_en)
## [1] 47.93
median(nvar_en)
## [1] 45
sd(nvar_en)
## [1] 21.33914
nvar_tw <- nvar_selected_1[2,]
nvar_tw <- nvar_tw[runs_itw_tr]
mean(nvar_tw)
## [1] 45.26
median(nvar_tw)
## [1] 42
sd(nvar_tw)
## [1] 21.10992
# EN train
acc_cox_tr_EN <- acc_cox_tr[1,]
acc_cox_tr_EN <- acc_cox_tr_EN[runs_en_tr]
#hist(acc_cox_tr_EN)
mean(acc_cox_tr_EN)
## [1] 0.8394737
median(acc_cox_tr_EN)
## [1] 0.8421053
sd(acc_cox_tr_EN)
## [1] 0.05931749
# EN test
acc_cox_tes_EN <- acc_cox_tes[1,]
acc_cox_tes_EN <- acc_cox_tes_EN[runs_en_ts]
#hist(acc_cox_tes_EN)
mean(acc_cox_tes_EN)
## [1] 0.5865052
median(acc_cox_tes_EN)
## [1] 0.5882353
sd(acc_cox_tes_EN)
## [1] 0.1018705
#iTwiner train
acc_cox_tr_iTwiner <- acc_cox_tr[2,]
acc_cox_tr_iTwiner <- acc_cox_tr_iTwiner[runs_itw_tr]
#hist(acc_cox_tr_iTwiner)
mean(acc_cox_tr_iTwiner)
## [1] 0.7313158
median(acc_cox_tr_iTwiner)
## [1] 0.7368421
sd(acc_cox_tr_iTwiner)
## [1] 0.09293091
#iTwiner test
acc_cox_tes_iTwiner <- acc_cox_tes[2,]
acc_cox_tes_iTwiner <- acc_cox_tes_iTwiner[runs_itw_ts]
#hist(acc_cox_tes_iTwiner)
mean(acc_cox_tes_iTwiner)
## [1] 0.6388125
median(acc_cox_tes_iTwiner)
## [1] 0.6470588
sd(acc_cox_tes_iTwiner)
## [1] 0.05565765
# EN train
miscl_cox_tr_EN <- miscl_cox_tr[1,]
miscl_cox_tr_EN <- miscl_cox_tr_EN[runs_en_tr]
mean(miscl_cox_tr_EN)
## [1] 6.1
median(miscl_cox_tr_EN)
## [1] 6
sd(miscl_cox_tr_EN)
## [1] 2.254065
# EN test
miscl_cox_tes_EN <- miscl_cox_tes[1,]
miscl_cox_tes_EN <- miscl_cox_tes_EN[runs_en_ts]
mean(miscl_cox_tes_EN)
## [1] 7.029412
median(miscl_cox_tes_EN)
## [1] 7
sd(miscl_cox_tes_EN)
## [1] 1.731799
#iTwiner train
miscl_cox_tr_iTwiner <- miscl_cox_tr[2,]
miscl_cox_tr_iTwiner <- miscl_cox_tr_iTwiner[runs_itw_tr]
mean(miscl_cox_tr_iTwiner)
## [1] 10.21
median(miscl_cox_tr_iTwiner)
## [1] 10
sd(miscl_cox_tr_iTwiner)
## [1] 3.531375
#iTwiner test
miscl_cox_tes_iTwiner <- miscl_cox_tes[2,]
miscl_cox_tes_iTwiner <- miscl_cox_tes_iTwiner[runs_itw_ts]
mean(miscl_cox_tes_iTwiner)
## [1] 6.140187
median(miscl_cox_tes_iTwiner)
## [1] 6
sd(miscl_cox_tes_iTwiner)
## [1] 0.94618
#EN
fneg_reg_en_train <- fneg_reg_tr[1,runs_en_tr]
mean(fneg_reg_en_train)
## [1] 6.1
median(fneg_reg_en_train)
## [1] 6
sd(fneg_reg_en_train)
## [1] 2.254065
fneg_reg_en_test <- fneg_reg_ts[1,runs_en_ts]
mean(fneg_reg_en_test)
## [1] 4.911765
median(fneg_reg_en_test)
## [1] 5
sd(fneg_reg_en_test)
## [1] 1.386662
#iTwiner
fneg_reg_iTwiner_train <- fneg_reg_tr[2,runs_itw_tr]
mean(fneg_reg_iTwiner_train)
## [1] 10.21
median(fneg_reg_iTwiner_train)
## [1] 10
sd(fneg_reg_iTwiner_train)
## [1] 3.531375
fneg_reg_iTwiner_test <- fneg_reg_ts[2,runs_itw_ts]
mean(fneg_reg_iTwiner_test)
## [1] 6.065421
median(fneg_reg_iTwiner_test)
## [1] 6
sd(fneg_reg_iTwiner_test)
## [1] 0.9035676
# EN train
sensitivity_cox_tr_EN <- sensitivity_cox_tr[1,]
sensitivity_cox_tr_EN <- sensitivity_cox_tr_EN[runs_en_tr]
mean(sensitivity_cox_tr_EN)
## [1] 0.6411765
median(sensitivity_cox_tr_EN)
## [1] 0.6470588
sd(sensitivity_cox_tr_EN)
## [1] 0.132592
# EN test
sensitivity_cox_tes_EN <- sensitivity_cox_tes[1,]
sensitivity_cox_tes_EN <- sensitivity_cox_tes_EN[runs_en_ts]
mean(sensitivity_cox_tes_EN)
## [1] 0.3860294
median(sensitivity_cox_tes_EN)
## [1] 0.375
sd(sensitivity_cox_tes_EN)
## [1] 0.1733327
#iTwiner train
sensitivity_cox_tr_iTwiner <- sensitivity_cox_tr[2,]
sensitivity_cox_tr_iTwiner <- sensitivity_cox_tr_iTwiner[runs_itw_tr]
mean(sensitivity_cox_tr_iTwiner)
## [1] 0.3994118
median(sensitivity_cox_tr_iTwiner)
## [1] 0.4117647
sd(sensitivity_cox_tr_iTwiner)
## [1] 0.2077279
#iTwiner test
sensitivity_cox_tes_iTwiner <- sensitivity_cox_tes[2,]
sensitivity_cox_tes_iTwiner <- sensitivity_cox_tes_iTwiner[runs_itw_ts]
mean(sensitivity_cox_tes_iTwiner)
## [1] 0.2418224
median(sensitivity_cox_tes_iTwiner)
## [1] 0.25
sd(sensitivity_cox_tes_iTwiner)
## [1] 0.112946
# EN train
specificity_cox_tr_EN <- specificity_cox_tr[1,]
specificity_cox_tr_EN <- specificity_cox_tr_EN[runs_en_tr]
mean(specificity_cox_tr_EN)
## [1] 1
median(specificity_cox_tr_EN)
## [1] 1
sd(specificity_cox_tr_EN)
## [1] 0
# EN test
specificity_cox_tes_EN <- specificity_cox_tes[1,]
specificity_cox_tes_EN <- specificity_cox_tes_EN[runs_en_ts]
mean(specificity_cox_tes_EN)
## [1] 0.7647059
median(specificity_cox_tes_EN)
## [1] 0.7777778
sd(specificity_cox_tes_EN)
## [1] 0.1664238
#iTwiner train
specificity_cox_tr_iTwiner <- specificity_cox_tr[2,]
specificity_cox_tr_iTwiner <- specificity_cox_tr_iTwiner[runs_itw_tr]
mean(specificity_cox_tr_iTwiner)
## [1] 1
median(specificity_cox_tr_iTwiner)
## [1] 1
sd(specificity_cox_tr_iTwiner)
## [1] 0
#iTwiner test
specificity_cox_tes_iTwiner <- specificity_cox_tes[2,]
specificity_cox_tes_iTwiner <- specificity_cox_tes_iTwiner[runs_itw_ts]
mean(specificity_cox_tes_iTwiner)
## [1] 0.9916926
median(specificity_cox_tes_iTwiner)
## [1] 1
sd(specificity_cox_tes_iTwiner)
## [1] 0.0395082
# EN train
auc_cox_tr_EN <- auc_cox_tr[1,]
auc_cox_tr_EN <- auc_cox_tr_EN[runs_en_tr]
mean(auc_cox_tr_EN)
## [1] 0.8205882
median(auc_cox_tr_EN)
## [1] 0.8235294
sd(auc_cox_tr_EN)
## [1] 0.06629602
# EN test
auc_cox_tes_EN <- auc_cox_tes[1,]
auc_cox_tes_EN <- auc_cox_tes_EN[runs_en_ts]
mean(auc_cox_tes_EN)
## [1] 0.5834014
median(auc_cox_tes_EN)
## [1] 0.5763889
sd(auc_cox_tes_EN)
## [1] 0.09536522
#iTwiner train
auc_cox_tr_iTwiner <- auc_cox_tr[2,]
auc_cox_tr_iTwiner <- auc_cox_tr_iTwiner[runs_itw_tr]
mean(auc_cox_tr_iTwiner)
## [1] 0.6997059
median(auc_cox_tr_iTwiner)
## [1] 0.7058824
sd(auc_cox_tr_iTwiner)
## [1] 0.103864
#iTwiner test
auc_cox_tes_iTwiner <- auc_cox_tes[2,]
auc_cox_tes_iTwiner <- auc_cox_tes_iTwiner[runs_itw_ts]
mean(auc_cox_tes_iTwiner)
## [1] 0.6167575
median(auc_cox_tes_iTwiner)
## [1] 0.625
sd(auc_cox_tes_iTwiner)
## [1] 0.05843036
Variables always selected
var_selected_alw_select_en <- var_selected_en1[runs_en_ts]
var_selected_alw_select_en1 <- Reduce(intersect,var_selected_alw_select_en)
print(paste("variables always selected by EN = ",length(var_selected_alw_select_en1)))
## [1] "variables always selected by EN = 0"
var_selected_alw_select_iTwiner <- var_selected_iTwiner1[runs_itw_ts]
var_selected_alw_select_iTwiner1 <- Reduce(intersect,var_selected_alw_select_iTwiner)
print(paste("variables always selected by iTwiner = ",length(var_selected_alw_select_iTwiner1)))
## [1] "variables always selected by iTwiner = 2"
Variables selected in 50 bootstrap samples
l = length(var_selected_alw_select_en)
var_selected_50_select_en <- table(unlist(var_selected_alw_select_en))
var_selected_50_select_en <- as.data.frame(var_selected_50_select_en)
var_selected_50_select_en <- subset(var_selected_50_select_en, Freq > 0.50*l)
print(paste("variables selected 50% by EN = ",length(var_selected_50_select_en$Var1)))
## [1] "variables selected 50% by EN = 4"
var_selected_50_select_en$Var1
## [1] CD200 MIR602 NCOR1P4 RLN3
## 1392 Levels: A2M-AS1 AANAT ABITRAMP1 ABR ACP3 ACRBP ACTBP2 ACTBP9 ... ZSCAN12P1
#
l = length(var_selected_alw_select_iTwiner)
var_selected_50_select_iTwiner <- table(unlist(var_selected_alw_select_iTwiner))
var_selected_50_select_iTwiner <- as.data.frame(var_selected_50_select_iTwiner)
var_selected_50_select_iTwiner <- subset(var_selected_50_select_iTwiner, Freq > 0.50*l)
print(paste("variables selected 50% by iTwiner = ",length(var_selected_50_select_iTwiner$Var1)))
## [1] "variables selected 50% by iTwiner = 25"
var_selected_50_select_iTwiner$Var1
## [1] CHMP1AP1 DNTT EEF1B2P6 GZMAP1 HAUS5-DT HORMAD2 HSPD1P7
## [8] KCTD9P3 LINC02543 LRIT1 MIR3907 MIR564 MIR602 NDUFA5P10
## [15] OR1S2 OR51K1P PRSS57 RAC1P3 RN7SKP32 RNU6-428P RNU6-552P
## [22] RPL32P17 TPT1P3 TRBV11-1 XRCC6P2
## 237 Levels: ABCD1P3 ACTN4P1 ALDH7A1P4 ANKRD20A10P API5P1 APOOP4 ... ZNF725P
var_selected_50_select_en <- table(unlist(var_selected_alw_select_en))
var_selected_50_select_en <- as.data.frame(var_selected_50_select_en)
var_selected_50_select_en <- var_selected_50_select_en[order(var_selected_50_select_en$Freq, decreasing = T),]
hist(var_selected_50_select_en$Freq)
top100_en <- var_selected_50_select_en[1:100,]
top100_en <- top100_en$Var1
top50_en <- top100_en[1:50]
top100_en <- as.data.frame(top100_en)
write_xlsx(top100_en,"List_top100_en_d2.xlsx")
var_selected_50_select_iTwiner <- table(unlist(var_selected_alw_select_iTwiner))
var_selected_50_select_iTwiner <- as.data.frame(var_selected_50_select_iTwiner)
var_selected_50_select_iTwiner <- var_selected_50_select_iTwiner[order(var_selected_50_select_iTwiner$Freq, decreasing = T),]
hist(var_selected_50_select_iTwiner$Freq)
top100_itw <- var_selected_50_select_iTwiner[1:100,]
top100_itw <- top100_itw$Var1
top50_itw <- top100_itw[1:50]
top100_itw <- as.data.frame(top100_itw)
write_xlsx(top100_itw,"List_top100_itw_d2.xlsx")
variables in common between EN and iTwiner
common_var_selected_50_en_iTwiner <- var_selected_50_select_iTwiner$Var1[which(var_selected_50_select_iTwiner$Var1 %in% var_selected_50_select_en$Var1)]
length(common_var_selected_50_en_iTwiner)
## [1] 50
common_var_selected_50_en_iTwiner
## [1] MIR602 RAC1P3 XRCC6P2 MIR3907 KCTD9P3
## [6] TRBV11-1 GZMAP1 PRSS57 LRIT1 LINC02543
## [11] RNU6-428P HAUS5-DT DNTT RN7SKP32 TPT1P3
## [16] OR1S2 RPL32P17 HORMAD2 EEF1B2P6 MIR564
## [21] SULT6B2P MTND1P22 RNU1-18P TRBJ1-6 SCDP1
## [26] LINC02868 MIR6508 LINC01100 MAGEB6 RNU6-542P
## [31] MIR376B TRAJ40 TRAJ58 RNU7-193P MIR6792
## [36] ANKRD20A10P MIR4301 DBF4P3 MIR509-2 RPL34P27
## [41] CLUHP5 DEFB4B KDM4F LINC01927 CDH12P4
## [46] HMGB1P50 IGKV1OR2-118 MIR6729 RNA5SP42 RNU4-92P
## 237 Levels: ABCD1P3 ACTN4P1 ALDH7A1P4 ANKRD20A10P API5P1 APOOP4 ... ZNF725P
xdata <- rnaseq1[ order(row.names(rnaseq1)), ]
nomesgenes <- c(as.vector(top50_en),as.vector(top50_itw))
xdata <- xdata[,nomesgenes]
rownames(DATASET1_bal) <- DATASET1_bal$ID
ydata <- as.data.frame(DATASET1_bal[ order(row.names(DATASET1_bal)), ])
rownames(ydata) <- ydata$ID
xdata <- xdata[rownames(xdata) %in%
rownames(ydata),]
ydata <- as.data.frame(ydata[rownames(ydata) %in%
rownames(xdata),])
# keep features with standard deviation > 0
xdata <- xdata[,sapply(seq(ncol(xdata)), function(ix) {sd(xdata[,ix])}) != 0]
xdata <- t(xdata)
group <- as.factor(ydata$class)
class <- as.data.frame(ydata$class)
edgeR.DGElist <- DGEList(counts=xdata, group = group)
# remove genes that do not have one count per million in at least 5 samples
keep <- rowSums(cpm(edgeR.DGElist) >= 1) >= 5
edgeR.DGElist <- edgeR.DGElist[keep ,]
# specify the design setup
design <- model.matrix(~group)
# estimate the dispersion for all read counts across all samples
edgeR.DGElist <- estimateDisp(edgeR.DGElist, design)
# fit the negative binomial model
edger_fit <- glmFit(edgeR.DGElist, design )
# perform the testing for every gene using the neg. binomial model
edger_lrt <- glmLRT(edger_fit)
summary(decideTests(edger_lrt))
## groupPm
## Down 2
## NotSig 95
## Up 3
# extract results from edger _lrt$ table plus adjusted p- values
DGE.results_edgeR <- topTags(edger_lrt, n = Inf , sort.by = "PValue" , adjust.method = "BH" )
topTags(DGE.results_edgeR) #table with the top10 DEGs
## Coefficient: groupPm
## logFC logCPM LR PValue FDR
## RN7SKP32 2.417523 10.98057 18.865397 1.402743e-05 0.001402743
## TPT1P3 2.034975 10.93476 13.937757 1.889644e-04 0.009448219
## XRCC6P2 -2.524191 11.11118 11.000408 9.109182e-04 0.022772954
## XRCC6P2.1 -2.524191 11.11118 11.000408 9.109182e-04 0.022772954
## MIR6508 1.870106 10.99754 10.186838 1.414466e-03 0.028289327
## LINC01100 -2.410049 11.08279 7.675900 5.596316e-03 0.089843327
## TMPRSS11F -2.376134 11.07022 7.277603 6.981966e-03 0.089843327
## OR51K1P -2.170233 11.04336 7.225512 7.187466e-03 0.089843327
## KIR2DL4 -1.338876 12.93927 6.650194 9.914503e-03 0.094843330
## MIR7854 1.225777 15.14133 6.396702 1.143326e-02 0.094843330
genes_deg <- DGE.results_edgeR$table
genes_deg <- genes_deg[which(genes_deg$FDR < 0.05),]
dim(genes_deg)#genes found to be differentially expressed
## [1] 5 5
genes_deg$row <- row.names(genes_deg)
write_xlsx(genes_deg,"genes_deg_enitw_d2.xlsx")
# highly expressed genes
high <- genes_deg[genes_deg$logFC > 0,]
dim(high)
## [1] 3 6
#low expressed genes
low <- genes_deg[genes_deg$logFC < 0,]
dim(low)
## [1] 2 6
xdata <- xdataT
ydata.raw <- as.data.frame(clinic1$class)
ydata.raw$row <- DATASET1$ID
#ydata.raw$class <- c(rep(0,28),rep(1,34))
ydata.raw$`clinic1$class`[ydata.raw$`clinic1$class` == "P"] <- 0
ydata.raw$`clinic1$class`[ydata.raw$`clinic1$class` == "Pm"] <- 1
rownames(ydata.raw) <- DATASET1$ID
xdata <- xdata[rownames(xdata) %in%
rownames(ydata.raw),]
ydata.raw <- as.data.frame(ydata.raw[rownames(ydata.raw) %in%
rownames(xdata),])
xdata <- xdata[ order(row.names(xdata)), ]
ydata.raw <- ydata.raw[ order(row.names(ydata.raw)), ]
ydata <- as.data.frame(ydata.raw[,1:2])
colnames(ydata) <- c("class","id")
names <- as.vector(top50_en)
xdata_en <- as.data.frame(xdata[,names])
nomesgenes <- colnames(xdata_en)
colnames(xdata_en) <- paste0("Var", 1:50)
xdata_en$type <- as.factor(ydata$class)
ydata$class <- as.numeric(ydata$class)
## [1] 1
## [1] 1011
## [1] 2
## [1] 1022
## [1] 3
## [1] 1033
## [1] 4
## [1] 1044
## [1] 5
## [1] 1055
## [1] 6
## [1] 1066
## [1] 7
## [1] 1077
## [1] 8
## [1] 1088
## [1] 9
## [1] 1099
## [1] 10
## [1] 1110
## [1] 11
## [1] 1121
## [1] 12
## [1] 1132
## [1] 13
## [1] 1143
## [1] 14
## [1] 1154
## [1] 15
## [1] 1165
## [1] 16
## [1] 1176
## [1] 17
## [1] 1187
## [1] 18
## [1] 1198
## [1] 19
## [1] 1209
## [1] 20
## [1] 1220
## [1] 21
## [1] 1231
## [1] 22
## [1] 1242
## [1] 23
## [1] 1253
## [1] 24
## [1] 1264
## [1] 25
## [1] 1275
## [1] 26
## [1] 1286
## [1] 27
## [1] 1297
## [1] 28
## [1] 1308
## [1] 29
## [1] 1319
## [1] 30
## [1] 1330
## [1] 31
## [1] 1341
## [1] 32
## [1] 1352
## [1] 33
## [1] 1363
## [1] 34
## [1] 1374
## [1] 35
## [1] 1385
## [1] 36
## [1] 1396
## [1] 37
## [1] 1407
## [1] 38
## [1] 1418
## [1] 39
## [1] 1429
## [1] 40
## [1] 1440
## [1] 41
## [1] 1451
## [1] 42
## [1] 1462
## [1] 43
## [1] 1473
## [1] 44
## [1] 1484
## [1] 45
## [1] 1495
## [1] 46
## [1] 1506
## [1] 47
## [1] 1517
## [1] 48
## [1] 1528
## [1] 49
## [1] 1539
## [1] 50
## [1] 1550
## [1] 51
## [1] 1561
## [1] 52
## [1] 1572
## [1] 53
## [1] 1583
## [1] 54
## [1] 1594
## [1] 55
## [1] 1605
## [1] 56
## [1] 1616
## [1] 57
## [1] 1627
## [1] 58
## [1] 1638
## [1] 59
## [1] 1649
## [1] 60
## [1] 1660
## [1] 61
## [1] 1671
## [1] 62
## [1] 1682
## [1] 63
## [1] 1693
## [1] 64
## [1] 1704
## [1] 65
## [1] 1715
## [1] 66
## [1] 1726
## [1] 67
## [1] 1737
## [1] 68
## [1] 1748
## [1] 69
## [1] 1759
## [1] 70
## [1] 1770
## [1] 71
## [1] 1781
## [1] 72
## [1] 1792
## [1] 73
## [1] 1803
## [1] 74
## [1] 1814
## [1] 75
## [1] 1825
## [1] 76
## [1] 1836
## [1] 77
## [1] 1847
## [1] 78
## [1] 1858
## [1] 79
## [1] 1869
## [1] 80
## [1] 1880
## [1] 81
## [1] 1891
## [1] 82
## [1] 1902
## [1] 83
## [1] 1913
## [1] 84
## [1] 1924
## [1] 85
## [1] 1935
## [1] 86
## [1] 1946
## [1] 87
## [1] 1957
## [1] 88
## [1] 1968
## [1] 89
## [1] 1979
## [1] 90
## [1] 1990
## [1] 91
## [1] 2001
## [1] 92
## [1] 2012
## [1] 93
## [1] 2023
## [1] 94
## [1] 2034
## [1] 95
## [1] 2045
## [1] 96
## [1] 2056
## [1] 97
## [1] 2067
## [1] 98
## [1] 2078
## [1] 99
## [1] 2089
## [1] 100
## [1] 2100
# acc
acc_trees <- acc_enplus_train[1,]
#hist(acc_trees)
mean(acc_trees)
## [1] 0.9939474
median(acc_trees)
## [1] 1
sd(acc_trees)
## [1] 0.01113033
acc_svm <- acc_enplus_train[2,]
#hist(acc_svm)
mean(acc_svm)
## [1] 0.9321053
median(acc_svm)
## [1] 0.9210526
sd(acc_svm)
## [1] 0.02596254
acc_svmR <- acc_enplus_train[3,]
#hist(acc_svmR)
mean(acc_svmR)
## [1] 0.8781579
median(acc_svmR)
## [1] 0.9210526
sd(acc_svmR)
## [1] 0.07689654
acc_logs <- acc_enplus_train[4,]
#hist(acc_logs)
mean(acc_logs)
## [1] 1
median(acc_logs)
## [1] 1
sd(acc_logs)
## [1] 0
acc_rf <- acc_enplus_train[5,]
#hist(acc_rf)
mean(acc_rf)
## [1] 1
median(acc_rf)
## [1] 1
sd(acc_rf)
## [1] 0
# auc
auc_trees <- auc_enplus_train[1,]
#hist(auc_trees)
mean(auc_trees)
## [1] 0.9941317
median(auc_trees)
## [1] 1
sd(auc_trees)
## [1] 0.01086274
auc_svm <- auc_enplus_train[2,]
#hist(auc_svm)
mean(auc_svm)
## [1] 0.927591
median(auc_svm)
## [1] 0.9173669
sd(auc_svm)
## [1] 0.02755802
auc_svmR <- auc_enplus_train[3,]
#hist(auc_svmR)
mean(auc_svmR)
## [1] 0.8792297
median(auc_svmR)
## [1] 0.9117647
sd(auc_svmR)
## [1] 0.06996116
auc_logs <- auc_enplus_train[4,]
#hist(auc_logs)
mean(auc_logs)
## [1] 1
median(auc_logs)
## [1] 1
sd(auc_logs)
## [1] 0
auc_rf <- auc_enplus_train[5,]
#hist(auc_rf)
mean(auc_rf)
## [1] 1
median(auc_rf)
## [1] 1
sd(auc_rf)
## [1] 0
# miscl
miscl_trees <- miscl_enplus_train[1,]
#hist(miscl_trees)
mean(miscl_trees)
## [1] 0.23
median(miscl_trees)
## [1] 0
sd(miscl_trees)
## [1] 0.4229526
miscl_svm <- miscl_enplus_train[2,]
#hist(miscl_svm)
mean(miscl_svm)
## [1] 2.58
median(miscl_svm)
## [1] 3
sd(miscl_svm)
## [1] 0.9865766
miscl_svmR <- miscl_enplus_train[3,]
#hist(miscl_svmR)
mean(miscl_svmR)
## [1] 4.63
median(miscl_svmR)
## [1] 3
sd(miscl_svmR)
## [1] 2.922069
miscl_logs <- miscl_enplus_train[4,]
#hist(miscl_logs)
mean(miscl_logs)
## [1] 0
median(miscl_logs)
## [1] 0
sd(miscl_logs)
## [1] 0
miscl_rf <- miscl_enplus_train[5,]
#hist(miscl_rf)
mean(miscl_rf)
## [1] 0
median(miscl_rf)
## [1] 0
sd(miscl_rf)
## [1] 0
# sensitivity
sensitivity_trees <- sensitivity_enplus_train[1,]
#hist(sensitivity_trees)
mean(sensitivity_trees)
## [1] 0.9958824
median(sensitivity_trees)
## [1] 1
sd(sensitivity_trees)
## [1] 0.01508426
sensitivity_svm <- sensitivity_enplus_train[2,]
#hist(sensitivity_svm)
mean(sensitivity_svm)
## [1] 0.8847059
median(sensitivity_svm)
## [1] 0.8823529
sd(sensitivity_svm)
## [1] 0.04797096
sensitivity_svmR <- sensitivity_enplus_train[3,]
#hist(sensitivity_svmR)
mean(sensitivity_svmR)
## [1] 0.8894118
median(sensitivity_svmR)
## [1] 0.8823529
sd(sensitivity_svmR)
## [1] 0.0610282
sensitivity_logs <- sensitivity_enplus_train[4,]
#hist(sensitivity_logs)
mean(sensitivity_logs)
## [1] 1
median(sensitivity_logs)
## [1] 1
sd(sensitivity_logs)
## [1] 0
sensitivity_rf <- sensitivity_enplus_train[5,]
#hist(sensitivity_rf)
mean(sensitivity_rf)
## [1] 1
median(sensitivity_rf)
## [1] 1
sd(sensitivity_rf)
## [1] 0
# specificity
specificity_trees <- specificity_enplus_train[1,]
#hist(specificity_trees)
mean(specificity_trees)
## [1] 0.992381
median(specificity_trees)
## [1] 1
sd(specificity_trees)
## [1] 0.01754538
specificity_svm <- specificity_enplus_train[2,]
#hist(specificity_svm)
mean(specificity_svm)
## [1] 0.9704762
median(specificity_svm)
## [1] 0.952381
sd(specificity_svm)
## [1] 0.02323008
specificity_svmR <- specificity_enplus_train[3,]
#hist(specificity_svmR)
mean(specificity_svmR)
## [1] 0.8690476
median(specificity_svmR)
## [1] 0.952381
sd(specificity_svmR)
## [1] 0.1464789
specificity_logs <- specificity_enplus_train[4,]
#hist(specificity_logs)
mean(specificity_logs)
## [1] 1
median(specificity_logs)
## [1] 1
sd(specificity_logs)
## [1] 0
specificity_rf <- specificity_enplus_train[5,]
#hist(specificity_rf)
mean(specificity_rf)
## [1] 1
median(specificity_rf)
## [1] 1
sd(specificity_rf)
## [1] 0
# fneg
fneg_trees <- fneg_enplus_train[1,]
#hist(fneg_trees)
mean(fneg_trees)
## [1] 0.07
median(fneg_trees)
## [1] 0
sd(fneg_trees)
## [1] 0.2564324
fneg_svm <- fneg_enplus_train[2,]
#hist(fneg_svm)
mean(fneg_svm)
## [1] 1.96
median(fneg_svm)
## [1] 2
sd(fneg_svm)
## [1] 0.8155063
fneg_svmR <- fneg_enplus_train[3,]
#hist(fneg_svmR)
mean(fneg_svmR)
## [1] 1.88
median(fneg_svmR)
## [1] 2
sd(fneg_svmR)
## [1] 1.037479
fneg_logs <- fneg_enplus_train[4,]
#hist(fneg_logs)
mean(fneg_logs)
## [1] 0
median(fneg_logs)
## [1] 0
sd(fneg_logs)
## [1] 0
fneg_rf <- fneg_enplus_train[5,]
#hist(fneg_rf)
mean(fneg_rf)
## [1] 0
median(fneg_rf)
## [1] 0
sd(fneg_rf)
## [1] 0
# acc
acc_trees <- acc_enplus[1,]
#hist(acc_trees)
mean(acc_trees)
## [1] 0.6611765
median(acc_trees)
## [1] 0.6470588
sd(acc_trees)
## [1] 0.09277112
acc_svm <- acc_enplus[2,]
#hist(acc_svm)
mean(acc_svm)
## [1] 0.7217647
median(acc_svm)
## [1] 0.7058824
sd(acc_svm)
## [1] 0.0796935
acc_svmR <- acc_enplus[3,]
#hist(acc_svmR)
mean(acc_svmR)
## [1] 0.7723529
median(acc_svmR)
## [1] 0.7647059
sd(acc_svmR)
## [1] 0.09483351
acc_logs <- acc_enplus[4,]
#hist(acc_logs)
mean(acc_logs)
## [1] 0.7070588
median(acc_logs)
## [1] 0.7058824
sd(acc_logs)
## [1] 0.09234062
acc_rf <- acc_enplus[5,]
#hist(acc_rf)
mean(acc_rf)
## [1] 0.7835294
median(acc_rf)
## [1] 0.7647059
sd(acc_rf)
## [1] 0.07793067
# auc
auc_trees <- auc_enplus[1,]
#hist(auc_trees)
mean(auc_trees)
## [1] 0.6629861
median(auc_trees)
## [1] 0.6527778
sd(auc_trees)
## [1] 0.08695739
auc_svm <- auc_enplus[2,]
#hist(auc_svm)
mean(auc_svm)
## [1] 0.7148611
median(auc_svm)
## [1] 0.7048611
sd(auc_svm)
## [1] 0.0820896
auc_svmR <- auc_enplus[3,]
#hist(auc_svmR)
mean(auc_svmR)
## [1] 0.7766667
median(auc_svmR)
## [1] 0.7708333
sd(auc_svmR)
## [1] 0.0915169
auc_logs <- auc_enplus[4,]
#hist(auc_logs)
mean(auc_logs)
## [1] 0.7030556
median(auc_logs)
## [1] 0.7013889
sd(auc_logs)
## [1] 0.09303933
auc_rf <- auc_enplus[5,]
#hist(auc_rf)
mean(auc_rf)
## [1] 0.7770833
median(auc_rf)
## [1] 0.7638889
sd(auc_rf)
## [1] 0.0787563
# miscl
miscl_trees <- miscl_enplus[1,]
#hist(miscl_trees)
mean(miscl_trees)
## [1] 5.76
median(miscl_trees)
## [1] 6
sd(miscl_trees)
## [1] 1.577109
miscl_svm <- miscl_enplus[2,]
#hist(miscl_svm)
mean(miscl_svm)
## [1] 4.73
median(miscl_svm)
## [1] 5
sd(miscl_svm)
## [1] 1.354789
miscl_svmR <- miscl_enplus[3,]
#hist(miscl_svmR)
mean(miscl_svmR)
## [1] 3.87
median(miscl_svmR)
## [1] 4
sd(miscl_svmR)
## [1] 1.61217
miscl_logs <- miscl_enplus[4,]
#hist(miscl_logs)
mean(miscl_logs)
## [1] 4.98
median(miscl_logs)
## [1] 5
sd(miscl_logs)
## [1] 1.569791
miscl_rf <- miscl_enplus[5,]
#hist(miscl_rf)
mean(miscl_rf)
## [1] 3.68
median(miscl_rf)
## [1] 4
sd(miscl_rf)
## [1] 1.324821
# sensitivity
sensitivity_trees <- sensitivity_enplus[1,]
#hist(sensitivity_trees)
mean(sensitivity_trees)
## [1] 0.64375
median(sensitivity_trees)
## [1] 0.625
sd(sensitivity_trees)
## [1] 0.1602702
sensitivity_svm <- sensitivity_enplus[2,]
#hist(sensitivity_svm)
mean(sensitivity_svm)
## [1] 0.5975
median(sensitivity_svm)
## [1] 0.625
sd(sensitivity_svm)
## [1] 0.1653165
sensitivity_svmR <- sensitivity_enplus[3,]
#hist(sensitivity_svmR)
mean(sensitivity_svmR)
## [1] 0.85
median(sensitivity_svmR)
## [1] 0.875
sd(sensitivity_svmR)
## [1] 0.1204998
sensitivity_logs <- sensitivity_enplus[4,]
#hist(sensitivity_logs)
mean(sensitivity_logs)
## [1] 0.635
median(sensitivity_logs)
## [1] 0.625
sd(sensitivity_logs)
## [1] 0.1663634
sensitivity_rf <- sensitivity_enplus[5,]
#hist(sensitivity_rf)
mean(sensitivity_rf)
## [1] 0.6675
median(sensitivity_rf)
## [1] 0.75
sd(sensitivity_rf)
## [1] 0.1296119
# specificity
specificity_trees <- specificity_enplus[1,]
#hist(specificity_trees)
mean(specificity_trees)
## [1] 0.6766667
median(specificity_trees)
## [1] 0.6666667
sd(specificity_trees)
## [1] 0.1679487
specificity_svm <- specificity_enplus[2,]
#hist(specificity_svm)
mean(specificity_svm)
## [1] 0.8322222
median(specificity_svm)
## [1] 0.8888889
sd(specificity_svm)
## [1] 0.1122223
specificity_svmR <- specificity_enplus[3,]
#hist(specificity_svmR)
mean(specificity_svmR)
## [1] 0.7033333
median(specificity_svmR)
## [1] 0.6666667
sd(specificity_svmR)
## [1] 0.1835313
specificity_logs <- specificity_enplus[4,]
#hist(specificity_logs)
mean(specificity_logs)
## [1] 0.7711111
median(specificity_logs)
## [1] 0.7777778
sd(specificity_logs)
## [1] 0.1463016
specificity_rf <- specificity_enplus[5,]
#hist(specificity_rf)
mean(specificity_rf)
## [1] 0.8866667
median(specificity_rf)
## [1] 0.8888889
sd(specificity_rf)
## [1] 0.1070876
# fneg
fneg_trees <- fneg_enplus[1,]
#hist(fneg_trees)
mean(fneg_trees)
## [1] 2.85
median(fneg_trees)
## [1] 3
sd(fneg_trees)
## [1] 1.282162
fneg_svm <- fneg_enplus[2,]
#hist(fneg_svm)
mean(fneg_svm)
## [1] 3.22
median(fneg_svm)
## [1] 3
sd(fneg_svm)
## [1] 1.322532
fneg_svmR <- fneg_enplus[3,]
#hist(fneg_svmR)
mean(fneg_svmR)
## [1] 1.2
median(fneg_svmR)
## [1] 1
sd(fneg_svmR)
## [1] 0.9639984
fneg_logs <- fneg_enplus[4,]
#hist(fneg_logs)
mean(fneg_logs)
## [1] 2.92
median(fneg_logs)
## [1] 3
sd(fneg_logs)
## [1] 1.330907
fneg_rf <- fneg_enplus[5,]
#hist(fneg_rf)
mean(fneg_rf)
## [1] 2.66
median(fneg_rf)
## [1] 2
sd(fneg_rf)
## [1] 1.036895
xdata <- xdataT
ydata.raw <- as.data.frame(clinic1$class)
ydata.raw$row <- DATASET1$ID
#ydata.raw$class <- c(rep(0,28),rep(1,34))
ydata.raw$`clinic1$class`[ydata.raw$`clinic1$class` == "P"] <- 0
ydata.raw$`clinic1$class`[ydata.raw$`clinic1$class` == "Pm"] <- 1
rownames(ydata.raw) <- DATASET1$ID
xdata <- xdata[rownames(xdata) %in%
rownames(ydata.raw),]
ydata.raw <- as.data.frame(ydata.raw[rownames(ydata.raw) %in%
rownames(xdata),])
xdata <- xdata[ order(row.names(xdata)), ]
ydata.raw <- ydata.raw[ order(row.names(ydata.raw)), ]
ydata <- as.data.frame(ydata.raw[,1:2])
colnames(ydata) <- c("class","id")
names <- as.vector(top50_itw)
xdata_iTwiner <- as.data.frame(xdata[,names])
nomesgenes <- colnames(xdata_iTwiner)
colnames(xdata_iTwiner) <- paste0("Var", 1:50)
xdata_iTwiner$type <- as.factor(ydata$class)
ydata$class <- as.numeric(ydata$class)
## [1] 1
## [1] 1011
## [1] 2
## [1] 1022
## [1] 3
## [1] 1033
## [1] 4
## [1] 1044
## [1] 5
## [1] 1055
## [1] 6
## [1] 1066
## [1] 7
## [1] 1077
## [1] 8
## [1] 1088
## [1] 9
## [1] 1099
## [1] 10
## [1] 1110
## [1] 11
## [1] 1121
## [1] 12
## [1] 1132
## [1] 13
## [1] 1143
## [1] 14
## [1] 1154
## [1] 15
## [1] 1165
## [1] 16
## [1] 1176
## [1] 17
## [1] 1187
## [1] 18
## [1] 1198
## [1] 19
## [1] 1209
## [1] 20
## [1] 1220
## [1] 21
## [1] 1231
## [1] 22
## [1] 1242
## [1] 23
## [1] 1253
## [1] 24
## [1] 1264
## [1] 25
## [1] 1275
## [1] 26
## [1] 1286
## [1] 27
## [1] 1297
## [1] 28
## [1] 1308
## [1] 29
## [1] 1319
## [1] 30
## [1] 1330
## [1] 31
## [1] 1341
## [1] 32
## [1] 1352
## [1] 33
## [1] 1363
## [1] 34
## [1] 1374
## [1] 35
## [1] 1385
## [1] 36
## [1] 1396
## [1] 37
## [1] 1407
## [1] 38
## [1] 1418
## [1] 39
## [1] 1429
## [1] 40
## [1] 1440
## [1] 41
## [1] 1451
## [1] 42
## [1] 1462
## [1] 43
## [1] 1473
## [1] 44
## [1] 1484
## [1] 45
## [1] 1495
## [1] 46
## [1] 1506
## [1] 47
## [1] 1517
## [1] 48
## [1] 1528
## [1] 49
## [1] 1539
## [1] 50
## [1] 1550
## [1] 51
## [1] 1561
## [1] 52
## [1] 1572
## [1] 53
## [1] 1583
## [1] 54
## [1] 1594
## [1] 55
## [1] 1605
## [1] 56
## [1] 1616
## [1] 57
## [1] 1627
## [1] 58
## [1] 1638
## [1] 59
## [1] 1649
## [1] 60
## [1] 1660
## [1] 61
## [1] 1671
## [1] 62
## [1] 1682
## [1] 63
## [1] 1693
## [1] 64
## [1] 1704
## [1] 65
## [1] 1715
## [1] 66
## [1] 1726
## [1] 67
## [1] 1737
## [1] 68
## [1] 1748
## [1] 69
## [1] 1759
## [1] 70
## [1] 1770
## [1] 71
## [1] 1781
## [1] 72
## [1] 1792
## [1] 73
## [1] 1803
## [1] 74
## [1] 1814
## [1] 75
## [1] 1825
## [1] 76
## [1] 1836
## [1] 77
## [1] 1847
## [1] 78
## [1] 1858
## [1] 79
## [1] 1869
## [1] 80
## [1] 1880
## [1] 81
## [1] 1891
## [1] 82
## [1] 1902
## [1] 83
## [1] 1913
## [1] 84
## [1] 1924
## [1] 85
## [1] 1935
## [1] 86
## [1] 1946
## [1] 87
## [1] 1957
## [1] 88
## [1] 1968
## [1] 89
## [1] 1979
## [1] 90
## [1] 1990
## [1] 91
## [1] 2001
## [1] 92
## [1] 2012
## [1] 93
## [1] 2023
## [1] 94
## [1] 2034
## [1] 95
## [1] 2045
## [1] 96
## [1] 2056
## [1] 97
## [1] 2067
## [1] 98
## [1] 2078
## [1] 99
## [1] 2089
## [1] 100
## [1] 2100
# acc
acc_trees <- acc_tcoxplus_train[1,]
#hist(acc_trees)
mean(acc_trees)
## [1] 0.9755263
median(acc_trees)
## [1] 0.9736842
sd(acc_trees)
## [1] 0.02091094
acc_svm <- acc_tcoxplus_train[2,]
#hist(acc_svm)
mean(acc_svm)
## [1] 0.8694737
median(acc_svm)
## [1] 0.8684211
sd(acc_svm)
## [1] 0.02746558
acc_svmR <- acc_tcoxplus_train[3,]
#hist(acc_svmR)
mean(acc_svmR)
## [1] 0.8918421
median(acc_svmR)
## [1] 0.8947368
sd(acc_svmR)
## [1] 0.0776425
acc_logs <- acc_tcoxplus_train[4,]
#hist(acc_logs)
mean(acc_logs)
## [1] 0.9957895
median(acc_logs)
## [1] 1
sd(acc_logs)
## [1] 0.00969613
acc_rf <- acc_tcoxplus_train[5,]
#hist(acc_rf)
mean(acc_rf)
## [1] 0.9634211
median(acc_rf)
## [1] 0.9736842
sd(acc_rf)
## [1] 0.02421001
# auc
auc_trees <- auc_tcoxplus_train[1,]
#hist(auc_trees)
mean(auc_trees)
## [1] 0.9756723
median(auc_trees)
## [1] 0.9761905
sd(auc_trees)
## [1] 0.02088274
auc_svm <- auc_tcoxplus_train[2,]
#hist(auc_svm)
mean(auc_svm)
## [1] 0.8541176
median(auc_svm)
## [1] 0.8529412
sd(auc_svm)
## [1] 0.03069682
auc_svmR <- auc_tcoxplus_train[3,]
#hist(auc_svmR)
mean(auc_svmR)
## [1] 0.8822549
median(auc_svmR)
## [1] 0.8823529
sd(auc_svmR)
## [1] 0.08275711
auc_logs <- auc_tcoxplus_train[4,]
#hist(auc_logs)
mean(auc_logs)
## [1] 0.9957983
median(auc_logs)
## [1] 1
sd(auc_logs)
## [1] 0.009740052
auc_rf <- auc_tcoxplus_train[5,]
#hist(auc_rf)
mean(auc_rf)
## [1] 0.9591176
median(auc_rf)
## [1] 0.9705882
sd(auc_rf)
## [1] 0.02705824
# miscl
miscl_trees <- miscl_tcoxplus_train[1,]
#hist(miscl_trees)
mean(miscl_trees)
## [1] 0.93
median(miscl_trees)
## [1] 1
sd(miscl_trees)
## [1] 0.7946157
miscl_svm <- miscl_tcoxplus_train[2,]
#hist(miscl_svm)
mean(miscl_svm)
## [1] 4.96
median(miscl_svm)
## [1] 5
sd(miscl_svm)
## [1] 1.043692
miscl_svmR <- miscl_tcoxplus_train[3,]
#hist(miscl_svmR)
mean(miscl_svmR)
## [1] 4.11
median(miscl_svmR)
## [1] 4
sd(miscl_svmR)
## [1] 2.950415
miscl_logs <- miscl_tcoxplus_train[4,]
#hist(miscl_logs)
mean(miscl_logs)
## [1] 0.16
median(miscl_logs)
## [1] 0
sd(miscl_logs)
## [1] 0.3684529
miscl_rf <- miscl_tcoxplus_train[5,]
#hist(miscl_rf)
mean(miscl_rf)
## [1] 1.39
median(miscl_rf)
## [1] 1
sd(miscl_rf)
## [1] 0.9199802
# sensitivity
sensitivity_trees <- sensitivity_tcoxplus_train[1,]
#hist(sensitivity_trees)
mean(sensitivity_trees)
## [1] 0.9770588
median(sensitivity_trees)
## [1] 1
sd(sensitivity_trees)
## [1] 0.03116576
sensitivity_svm <- sensitivity_tcoxplus_train[2,]
#hist(sensitivity_svm)
mean(sensitivity_svm)
## [1] 0.7082353
median(sensitivity_svm)
## [1] 0.7058824
sd(sensitivity_svm)
## [1] 0.06139365
sensitivity_svmR <- sensitivity_tcoxplus_train[3,]
#hist(sensitivity_svmR)
mean(sensitivity_svmR)
## [1] 0.7911765
median(sensitivity_svmR)
## [1] 0.7941176
sd(sensitivity_svmR)
## [1] 0.1465827
sensitivity_logs <- sensitivity_tcoxplus_train[4,]
#hist(sensitivity_logs)
mean(sensitivity_logs)
## [1] 0.9958824
median(sensitivity_logs)
## [1] 1
sd(sensitivity_logs)
## [1] 0.01508426
sensitivity_rf <- sensitivity_tcoxplus_train[5,]
#hist(sensitivity_rf)
mean(sensitivity_rf)
## [1] 0.9182353
median(sensitivity_rf)
## [1] 0.9411765
sd(sensitivity_rf)
## [1] 0.05411648
# specificity
specificity_trees <- specificity_tcoxplus_train[1,]
#hist(specificity_trees)
mean(specificity_trees)
## [1] 0.9742857
median(specificity_trees)
## [1] 1
sd(specificity_trees)
## [1] 0.02982653
specificity_svm <- specificity_tcoxplus_train[2,]
#hist(specificity_svm)
mean(specificity_svm)
## [1] 1
median(specificity_svm)
## [1] 1
sd(specificity_svm)
## [1] 0
specificity_svmR <- specificity_tcoxplus_train[3,]
#hist(specificity_svmR)
mean(specificity_svmR)
## [1] 0.9733333
median(specificity_svmR)
## [1] 1
sd(specificity_svmR)
## [1] 0.06779097
specificity_logs <- specificity_tcoxplus_train[4,]
#hist(specificity_logs)
mean(specificity_logs)
## [1] 0.9957143
median(specificity_logs)
## [1] 1
sd(specificity_logs)
## [1] 0.01369636
specificity_rf <- specificity_tcoxplus_train[5,]
#hist(specificity_rf)
mean(specificity_rf)
## [1] 1
median(specificity_rf)
## [1] 1
sd(specificity_rf)
## [1] 0
# fneg
fneg_trees <- fneg_tcoxplus_train[1,]
#hist(fneg_trees)
mean(fneg_trees)
## [1] 0.39
median(fneg_trees)
## [1] 0
sd(fneg_trees)
## [1] 0.529818
fneg_svm <- fneg_tcoxplus_train[2,]
#hist(fneg_svm)
mean(fneg_svm)
## [1] 4.96
median(fneg_svm)
## [1] 5
sd(fneg_svm)
## [1] 1.043692
fneg_svmR <- fneg_tcoxplus_train[3,]
#hist(fneg_svmR)
mean(fneg_svmR)
## [1] 3.55
median(fneg_svmR)
## [1] 3.5
sd(fneg_svmR)
## [1] 2.491906
fneg_logs <- fneg_tcoxplus_train[4,]
#hist(fneg_logs)
mean(fneg_logs)
## [1] 0.07
median(fneg_logs)
## [1] 0
sd(fneg_logs)
## [1] 0.2564324
fneg_rf <- fneg_tcoxplus_train[5,]
#hist(fneg_rf)
mean(fneg_rf)
## [1] 1.39
median(fneg_rf)
## [1] 1
sd(fneg_rf)
## [1] 0.9199802
# acc
acc_trees <- acc_tcoxplus[1,]
#hist(acc_trees)
mean(acc_trees)
## [1] 0.6764706
median(acc_trees)
## [1] 0.7058824
sd(acc_trees)
## [1] 0.08546861
acc_svm <- acc_tcoxplus[2,]
#hist(acc_svm)
mean(acc_svm)
## [1] 0.7705882
median(acc_svm)
## [1] 0.7647059
sd(acc_svm)
## [1] 0.0894646
acc_svmR <- acc_tcoxplus[3,]
#hist(acc_svmR)
mean(acc_svmR)
## [1] 0.7241176
median(acc_svmR)
## [1] 0.7058824
sd(acc_svmR)
## [1] 0.0973936
acc_logs <- acc_tcoxplus[4,]
#hist(acc_logs)
mean(acc_logs)
## [1] 0.6611765
median(acc_logs)
## [1] 0.6470588
sd(acc_logs)
## [1] 0.09718703
acc_rf <- acc_tcoxplus[5,]
#hist(acc_rf)
mean(acc_rf)
## [1] 0.8394118
median(acc_rf)
## [1] 0.8235294
sd(acc_rf)
## [1] 0.05783785
# auc
auc_trees <- auc_tcoxplus[1,]
#hist(auc_trees)
mean(auc_trees)
## [1] 0.6742361
median(auc_trees)
## [1] 0.7013889
sd(auc_trees)
## [1] 0.08534957
auc_svm <- auc_tcoxplus[2,]
#hist(auc_svm)
mean(auc_svm)
## [1] 0.7588194
median(auc_svm)
## [1] 0.75
sd(auc_svm)
## [1] 0.09193715
auc_svmR <- auc_tcoxplus[3,]
#hist(auc_svmR)
mean(auc_svmR)
## [1] 0.7232639
median(auc_svmR)
## [1] 0.7152778
sd(auc_svmR)
## [1] 0.09580586
auc_logs <- auc_tcoxplus[4,]
#hist(auc_logs)
mean(auc_logs)
## [1] 0.6525
median(auc_logs)
## [1] 0.6388889
sd(auc_logs)
## [1] 0.09724186
auc_rf <- auc_tcoxplus[5,]
#hist(auc_rf)
mean(auc_rf)
## [1] 0.8302083
median(auc_rf)
## [1] 0.8125
sd(auc_rf)
## [1] 0.06119949
# miscl
miscl_trees <- miscl_tcoxplus[1,]
#hist(miscl_trees)
mean(miscl_trees)
## [1] 5.5
median(miscl_trees)
## [1] 5
sd(miscl_trees)
## [1] 1.452966
miscl_svm <- miscl_tcoxplus[2,]
#hist(miscl_svm)
mean(miscl_svm)
## [1] 3.9
median(miscl_svm)
## [1] 4
sd(miscl_svm)
## [1] 1.520898
miscl_svmR <- miscl_tcoxplus[3,]
#hist(miscl_svmR)
mean(miscl_svmR)
## [1] 4.69
median(miscl_svmR)
## [1] 5
sd(miscl_svmR)
## [1] 1.655691
miscl_logs <- miscl_tcoxplus[4,]
#hist(miscl_logs)
mean(miscl_logs)
## [1] 5.76
median(miscl_logs)
## [1] 6
sd(miscl_logs)
## [1] 1.652179
miscl_rf <- miscl_tcoxplus[5,]
#hist(miscl_rf)
mean(miscl_rf)
## [1] 2.73
median(miscl_rf)
## [1] 3
sd(miscl_rf)
## [1] 0.9832434
# sensitivity
sensitivity_trees <- sensitivity_tcoxplus[1,]
#hist(sensitivity_trees)
mean(sensitivity_trees)
## [1] 0.62625
median(sensitivity_trees)
## [1] 0.625
sd(sensitivity_trees)
## [1] 0.1680764
sensitivity_svm <- sensitivity_tcoxplus[2,]
#hist(sensitivity_svm)
mean(sensitivity_svm)
## [1] 0.55875
median(sensitivity_svm)
## [1] 0.625
sd(sensitivity_svm)
## [1] 0.1522713
sensitivity_svmR <- sensitivity_tcoxplus[3,]
#hist(sensitivity_svmR)
mean(sensitivity_svmR)
## [1] 0.70875
median(sensitivity_svmR)
## [1] 0.75
sd(sensitivity_svmR)
## [1] 0.1657694
sensitivity_logs <- sensitivity_tcoxplus[4,]
#hist(sensitivity_logs)
mean(sensitivity_logs)
## [1] 0.495
median(sensitivity_logs)
## [1] 0.5
sd(sensitivity_logs)
## [1] 0.1758098
sensitivity_rf <- sensitivity_tcoxplus[5,]
#hist(sensitivity_rf)
mean(sensitivity_rf)
## [1] 0.67375
median(sensitivity_rf)
## [1] 0.625
sd(sensitivity_rf)
## [1] 0.1254977
# specificity
specificity_trees <- specificity_tcoxplus[1,]
#hist(specificity_trees)
mean(specificity_trees)
## [1] 0.7211111
median(specificity_trees)
## [1] 0.7777778
sd(specificity_trees)
## [1] 0.1425674
specificity_svm <- specificity_tcoxplus[2,]
#hist(specificity_svm)
mean(specificity_svm)
## [1] 0.9588889
median(specificity_svm)
## [1] 1
sd(specificity_svm)
## [1] 0.08453861
specificity_svmR <- specificity_tcoxplus[3,]
#hist(specificity_svmR)
mean(specificity_svmR)
## [1] 0.7377778
median(specificity_svmR)
## [1] 0.7777778
sd(specificity_svmR)
## [1] 0.1878728
specificity_logs <- specificity_tcoxplus[4,]
#hist(specificity_logs)
mean(specificity_logs)
## [1] 0.8088889
median(specificity_logs)
## [1] 0.8888889
sd(specificity_logs)
## [1] 0.1507184
specificity_rf <- specificity_tcoxplus[5,]
#hist(specificity_rf)
mean(specificity_rf)
## [1] 0.9866667
median(specificity_rf)
## [1] 1
sd(specificity_rf)
## [1] 0.03957626
# fneg
fneg_trees <- fneg_tcoxplus[1,]
#hist(fneg_trees)
mean(fneg_trees)
## [1] 2.99
median(fneg_trees)
## [1] 3
sd(fneg_trees)
## [1] 1.344611
fneg_svm <- fneg_tcoxplus[2,]
#hist(fneg_svm)
mean(fneg_svm)
## [1] 3.53
median(fneg_svm)
## [1] 3
sd(fneg_svm)
## [1] 1.218171
fneg_svmR <- fneg_tcoxplus[3,]
#hist(fneg_svmR)
mean(fneg_svmR)
## [1] 2.33
median(fneg_svmR)
## [1] 2
sd(fneg_svmR)
## [1] 1.326155
fneg_logs <- fneg_tcoxplus[4,]
#hist(fneg_logs)
mean(fneg_logs)
## [1] 4.04
median(fneg_logs)
## [1] 4
sd(fneg_logs)
## [1] 1.406479
fneg_rf <- fneg_tcoxplus[5,]
#hist(fneg_rf)
mean(fneg_rf)
## [1] 2.61
median(fneg_rf)
## [1] 3
sd(fneg_rf)
## [1] 1.003982
# save results
#save.image("~/results2_2010_final.RData")
dt2 <- acc[1,]
dt_en2 <- acc_enplus[1,]
dt_iTwiner2 <- acc_tcoxplus[1,]
#dt_hub <- acc_hubplus[1,]
acc_dt2 <- as.data.frame(c(dt2,dt_en2, dt_iTwiner2
#,dt_hub
))
colnames(acc_dt2) <- "acc"
#acc_dt$group <- "HUB + DT"
#acc_dt$group[1:100] <- "DT"
acc_dt2$group <- "DT"
acc_dt2$group[101:200] <- "EN + DT"
acc_dt2$group[201:300] <- "iTwiner + DT"
acc_dt2<- acc_dt2 %>% mutate_if(is.character,factor)
# acc_dt$group <- ordered(acc_dt$group, levels = c("DT", "EN + DT","iTwiner + DT", "HUB + DT"))
acc_dt2$group <- ordered(acc_dt2$group, levels = c("DT", "EN + DT","iTwiner + DT"))
acc_dt2$dataset <- "DATASET2"
svmL2 <- acc[2,]
svmL_en2 <- acc_enplus[2,]
svmL_iTwiner2 <- acc_tcoxplus[2,]
acc_svmL2 <- as.data.frame(c(svmL2,svmL_en2, svmL_iTwiner2
#,svmL_hub
))
colnames(acc_svmL2) <- "acc"
acc_svmL2$group <- "svmL"
acc_svmL2$group[101:200] <- "EN + svmL"
acc_svmL2$group[201:300] <- "iTwiner + svmL"
acc_svmL2<- acc_svmL2 %>% mutate_if(is.character,factor)
acc_svmL2$group <- ordered(acc_svmL2$group, levels = c("svmL", "EN + svmL","iTwiner + svmL"))
acc_svmL2$dataset <- "DATASET2"
svmR2 <- acc[3,]
svmR_en2 <- acc_enplus[3,]
svmR_iTwiner2 <- acc_tcoxplus[3,]
acc_svmR2 <- as.data.frame(c(svmR2,svmR_en2, svmR_iTwiner2
#,svmR_hub
))
colnames(acc_svmR2) <- "acc"
acc_svmR2$group <- "svmR"
acc_svmR2$group[101:200] <- "EN + svmR"
acc_svmR2$group[201:300] <- "iTwiner + svmR"
acc_svmR2<- acc_svmR2 %>% mutate_if(is.character,factor)
acc_svmR2$group <- ordered(acc_svmR2$group, levels = c("svmR", "EN + svmR","iTwiner + svmR"))
acc_svmR2$dataset <- "DATASET2"
logist2 <- acc[4,]
logist_en2 <- acc_enplus[4,]
logist_iTwiner2 <- acc_tcoxplus[4,]
acc_logist2 <- as.data.frame(c(logist2,logist_en2, logist_iTwiner2
#,logist_hub
))
colnames(acc_logist2) <- "acc"
acc_logist2$group <- "logist"
acc_logist2$group[101:200] <- "EN + logist"
acc_logist2$group[201:300] <- "iTwiner + logist"
acc_logist2<- acc_logist2 %>% mutate_if(is.character,factor)
acc_logist2$group <- ordered(acc_logist2$group, levels = c("logist", "EN + logist","iTwiner + logist"))
acc_logist2$dataset <- "DATASET2"
rf2 <- acc[5,]
rf_en2 <- acc_enplus[5,]
rf_iTwiner2 <- acc_tcoxplus[5,]
acc_rf2 <- as.data.frame(c(rf2,rf_en2, rf_iTwiner2
#,rf_hub
))
colnames(acc_rf2) <- "acc"
acc_rf2$group <- "rf"
acc_rf2$group[101:200] <- "EN + rf"
acc_rf2$group[201:300] <- "iTwiner + rf"
acc_rf2<- acc_rf2 %>% mutate_if(is.character,factor)
acc_rf2$group <- ordered(acc_rf2$group, levels = c("rf", "EN + rf","iTwiner + rf"))
acc_rf2$dataset <- "DATASET2"
load("~/CRC_LCosta/results/results3_2010_final.RData")
DATASET1_bal <- df3
DATASET1 <- DATASET3_bal
clinic1 <- clinic3_bal
datasurv1 <- datasurv3
clinic1_bal <- clinic3_bal
# primM <- DATASET1 %>%
# filter(str_detect(class, "m"))
# rownames(primM) <- primM$ID
# primN <- DATASET1 %>%
# filter(!str_detect(class, "Pm"))
# rownames(primN) <- primN$ID
#
# clinic_prim <- DATASET1
#
# rnaprimM <- rnaseq1[rownames(rnaseq1) %in%
# rownames(primM),]
#
# rnaprimN <- rnaseq1[rownames(rnaseq1) %in%
# rownames(primN),]
#
# rnaprim <- rbind(rnaprimM,rnaprimN)
# xmet <- rnaprimM [,sapply(seq(ncol(rnaprimM)), function(ix) {sd(rnaprimM[,ix])}) != 0]
# xnon <- rnaprimN[,sapply(seq(ncol(rnaprimN)), function(ix) {sd(rnaprimN[,ix])}) != 0]
#
# xmet_less <- xmet[,which(colnames(xmet) %in% colnames(xnon))]
# xnon_less <- xnon[,which(colnames(xnon) %in% colnames(xmet))]
#
# # normalizing data
# xmet_norm <- scale(log2(xmet_less+1))
# xnon_norm <- scale(log2(xnon_less+1))
#
# xdataT <- rbind(xmet_less,xnon_less)
# xdataT <- xdataT[ order(row.names(xdataT)), ]
#
#
# rm(xmet,xmet_less,xnon,xnon_less,rnaprimM,rnaprimN)
# #xmet_cor <- Matrix(cor(xmet_norm), sparse = TRUE)
# xmet_cor <- cor(xmet_norm)
# #xmet_cor <- as.data.frame(xmet_cor)
# xnon_cor <- cor(xnon_norm)
# #xnon_cor <- as.data.frame(xnon_cor)
#
# # angular distance
# ang_weight <- vector()
# for (i in 1:dim(xmet_cor)[2]){
# ang_weight[i] <- acos(cosine(xmet_cor[,i],xnon_cor[,i]))/pi
# }
#
# ## normalized weights
#
# weights <- ang_weight / max(ang_weight)
# hist(weights,main="w")
#
#
# pen_weight3 <- 1 / weights
# hist(pen_weight3, main="1 / w")
#
# rm(xmet_cor,xnon_cor)
clinical <- as.data.frame(clinic1_bal)
basic_eda <- function(clinical)
{
glimpse(clinical)
#df_Status(clinical)
freq(clinical)
profiling_num(clinical)
plot_num(clinical)
describe(clinic1)
}
basic_eda(clinical)
## Rows: 55
## Columns: 6
## $ class <chr> "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", …
## $ organ <chr> "rectum", "colon", "colon", "colon", "rectum", "rectum", "co…
## $ Sex <chr> "m", "m", "m", "f", "f", "f", "m", "m", "f", "m", "f", "f", …
## $ Age <dbl> 77, 85, 74, 78, 69, 61, 66, 76, 59, 73, 62, 71, 61, 74, 77, …
## $ Stage <chr> "II", "II", "III", "II", "III", "II", "III", "II", "III", "I…
## $ sidedness <chr> "rectum", "right", "rectum", NA, "rectum", "rectum", "left",…
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## class frequency percentage cumulative_perc
## 1 Pm 30 54.55 54.55
## 2 P 25 45.45 100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## organ frequency percentage cumulative_perc
## 1 colon 47 85.45 85.45
## 2 rectum 8 14.55 100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## Sex frequency percentage cumulative_perc
## 1 f 30 54.55 54.55
## 2 m 25 45.45 100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## Stage frequency percentage cumulative_perc
## 1 III 31 56.36 56.36
## 2 II 24 43.64 100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## sidedness frequency percentage cumulative_perc
## 1 right 24 43.64 43.64
## 2 left 17 30.91 74.55
## 3 rectum 10 18.18 92.73
## 4 <NA> 4 7.27 100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## clinic1
##
## 6 Variables 55 Observations
## --------------------------------------------------------------------------------
## class
## n missing distinct
## 55 0 2
##
## Value P Pm
## Frequency 25 30
## Proportion 0.455 0.545
## --------------------------------------------------------------------------------
## organ
## n missing distinct
## 55 0 2
##
## Value colon rectum
## Frequency 47 8
## Proportion 0.855 0.145
## --------------------------------------------------------------------------------
## Sex
## n missing distinct
## 55 0 2
##
## Value f m
## Frequency 30 25
## Proportion 0.545 0.455
## --------------------------------------------------------------------------------
## Age
## n missing distinct Info Mean Gmd .05 .10
## 48 7 26 0.997 69.5 13.24 53.0 56.7
## .25 .50 .75 .90 .95
## 61.0 71.5 78.0 85.0 85.0
##
## lowest : 37 39 53 56 57, highest: 78 81 85 86 87
## --------------------------------------------------------------------------------
## Stage
## n missing distinct
## 55 0 2
##
## Value II III
## Frequency 24 31
## Proportion 0.436 0.564
## --------------------------------------------------------------------------------
## sidedness
## n missing distinct
## 51 4 3
##
## Value left rectum right
## Frequency 17 10 24
## Proportion 0.333 0.196 0.471
## --------------------------------------------------------------------------------
a <- na.omit(clinical$Age)
mean(a)
## [1] 69.5
clinical_p <- clinic1_bal %>%
filter(!str_detect(class, "Pm"))
basic_eda <- function(clinical_p)
{
glimpse(clinical_p)
#df_Status(clinical_p)
freq(clinical_p)
profiling_num(clinical_p)
plot_num(clinical_p)
describe(clinical_p)
}
basic_eda(clinical_p)
## Rows: 25
## Columns: 6
## $ class <chr> "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", …
## $ organ <chr> "rectum", "colon", "colon", "colon", "rectum", "rectum", "co…
## $ Sex <chr> "m", "m", "m", "f", "f", "f", "m", "m", "f", "m", "f", "f", …
## $ Age <dbl> 77, 85, 74, 78, 69, 61, 66, 76, 59, 73, 62, 71, 61, 74, 77, …
## $ Stage <chr> "II", "II", "III", "II", "III", "II", "III", "II", "III", "I…
## $ sidedness <chr> "rectum", "right", "rectum", NA, "rectum", "rectum", "left",…
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## class frequency percentage cumulative_perc
## 1 P 25 100 100
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## organ frequency percentage cumulative_perc
## 1 colon 22 88 88
## 2 rectum 3 12 100
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## Sex frequency percentage cumulative_perc
## 1 f 13 52 52
## 2 m 12 48 100
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## Stage frequency percentage cumulative_perc
## 1 II 14 56 56
## 2 III 11 44 100
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## sidedness frequency percentage cumulative_perc
## 1 right 15 60 60
## 2 rectum 5 20 80
## 3 left 4 16 96
## 4 <NA> 1 4 100
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## clinical_p
##
## 6 Variables 25 Observations
## --------------------------------------------------------------------------------
## class
## n missing distinct value
## 25 0 1 P
##
## Value P
## Frequency 25
## Proportion 1
## --------------------------------------------------------------------------------
## organ
## n missing distinct
## 25 0 2
##
## Value colon rectum
## Frequency 22 3
## Proportion 0.88 0.12
## --------------------------------------------------------------------------------
## Sex
## n missing distinct
## 25 0 2
##
## Value f m
## Frequency 13 12
## Proportion 0.52 0.48
## --------------------------------------------------------------------------------
## Age
## n missing distinct Info Mean Gmd .05 .10
## 22 3 16 0.995 70.36 12.78 53.30 59.20
## .25 .50 .75 .90 .95
## 62.50 73.50 77.75 84.60 85.00
##
## lowest : 39 53 59 61 62, highest: 76 77 78 81 85
##
## Value 39 53 59 61 62 64 66 69 71 73 74
## Frequency 1 1 1 2 1 1 1 1 1 1 2
## Proportion 0.045 0.045 0.045 0.091 0.045 0.045 0.045 0.045 0.045 0.045 0.091
##
## Value 76 77 78 81 85
## Frequency 1 2 2 1 3
## Proportion 0.045 0.091 0.091 0.045 0.136
## --------------------------------------------------------------------------------
## Stage
## n missing distinct
## 25 0 2
##
## Value II III
## Frequency 14 11
## Proportion 0.56 0.44
## --------------------------------------------------------------------------------
## sidedness
## n missing distinct
## 24 1 3
##
## Value left rectum right
## Frequency 4 5 15
## Proportion 0.167 0.208 0.625
## --------------------------------------------------------------------------------
a <- na.omit(clinical_p$Age)
mean(a)
## [1] 70.36364
clinical_Pm <- clinic1_bal%>%
filter(str_detect(class, "Pm"))
basic_eda <- function(clinical_Pm)
{
glimpse(clinical_Pm)
#df_Status(clinical_Pm)
freq(clinical_Pm)
profiling_num(clinical_Pm)
plot_num(clinical_Pm)
describe(clinical_Pm)
}
basic_eda(clinical_Pm)
## Rows: 30
## Columns: 6
## $ class <chr> "Pm", "Pm", "Pm", "Pm", "Pm", "Pm", "Pm", "Pm", "Pm", "Pm", …
## $ organ <chr> "rectum", "colon", "rectum", "colon", "colon", "colon", "rec…
## $ Sex <chr> "f", "f", "f", "m", "f", "f", "m", "f", "f", "m", "m", "f", …
## $ Age <dbl> 58, 85, 62, 67, 75, 69, 58, 57, 72, 65, 61, 78, 81, 57, 74, …
## $ Stage <chr> "II", "II", "III", "III", "III", "II", "II", "III", "II", "I…
## $ sidedness <chr> "left", "right", "rectum", "rectum", "left", "left", "rectum…
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## class frequency percentage cumulative_perc
## 1 Pm 30 100 100
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## organ frequency percentage cumulative_perc
## 1 colon 25 83.33 83.33
## 2 rectum 5 16.67 100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## Sex frequency percentage cumulative_perc
## 1 f 17 56.67 56.67
## 2 m 13 43.33 100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## Stage frequency percentage cumulative_perc
## 1 III 20 66.67 66.67
## 2 II 10 33.33 100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## sidedness frequency percentage cumulative_perc
## 1 left 13 43.33 43.33
## 2 right 9 30.00 73.33
## 3 rectum 5 16.67 90.00
## 4 <NA> 3 10.00 100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## clinical_Pm
##
## 6 Variables 30 Observations
## --------------------------------------------------------------------------------
## class
## n missing distinct value
## 30 0 1 Pm
##
## Value Pm
## Frequency 30
## Proportion 1
## --------------------------------------------------------------------------------
## organ
## n missing distinct
## 30 0 2
##
## Value colon rectum
## Frequency 25 5
## Proportion 0.833 0.167
## --------------------------------------------------------------------------------
## Sex
## n missing distinct
## 30 0 2
##
## Value f m
## Frequency 17 13
## Proportion 0.567 0.433
## --------------------------------------------------------------------------------
## Age
## n missing distinct Info Mean Gmd .05 .10
## 26 4 18 0.997 68.77 13.91 53.75 56.50
## .25 .50 .75 .90 .95
## 58.75 69.00 77.25 85.00 85.75
##
## lowest : 37 53 56 57 58, highest: 78 81 85 86 87
##
## Value 37 53 56 57 58 61 62 65 67 69 72
## Frequency 1 1 1 2 2 1 1 1 1 3 2
## Proportion 0.038 0.038 0.038 0.077 0.077 0.038 0.038 0.038 0.038 0.115 0.077
##
## Value 74 75 78 81 85 86 87
## Frequency 2 1 1 2 2 1 1
## Proportion 0.077 0.038 0.038 0.077 0.077 0.038 0.038
## --------------------------------------------------------------------------------
## Stage
## n missing distinct
## 30 0 2
##
## Value II III
## Frequency 10 20
## Proportion 0.333 0.667
## --------------------------------------------------------------------------------
## sidedness
## n missing distinct
## 27 3 3
##
## Value left rectum right
## Frequency 13 5 9
## Proportion 0.481 0.185 0.333
## --------------------------------------------------------------------------------
a <- na.omit(clinical_Pm$Age)
mean(a)
## [1] 68.76923
clinical_factor <- clinic1_bal
clinical_factor <- clinical_factor %>%
mutate_if(sapply(clinical_factor, is.character), as.factor)
#clinical_factor <- na.omit(clinical_factor)
ggplot(data = clinical_factor) +
geom_bar(mapping = aes(x = class, fill = organ), position = "fill") + scale_fill_manual(values=c("#ADD8E6", "#4682B4"))
ggplot(data = clinical_factor) +
geom_bar(mapping = aes(x = class, fill = Sex), position = "fill") + scale_fill_manual(values=c("#ADD8E6", "#4682B4"))
clinical_factor1 <- clinical_factor %>%
filter(!str_detect(sidedness, "rectum"))
ggplot(data = clinical_factor1) +
geom_bar(mapping = aes(x = class, fill = sidedness), position = "fill") + scale_fill_manual(values=c("#ADD8E6", "#4682B4"))
ggplot(data = clinical_factor) +
geom_bar(mapping = aes(x = class, fill = Stage), position = "fill") + scale_fill_manual(values=c("#ADD8E6", "#4682B4", "#000080"))
clinical_factor %>%
ggplot( aes(x=Age, fill=class)) +
geom_histogram( color="#e9ecef", alpha=0.6, position = 'identity') +
scale_fill_manual(values=c("#69b3a2", "#404080"))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 7 rows containing non-finite values (stat_bin).
labs(fill="")
## $fill
## [1] ""
##
## attr(,"class")
## [1] "labels"
stat_data_organ <- table(clinical_factor$class,clinical_factor$organ)
#fazer plot(...)
fisher.test(stat_data_organ)
##
## Fisher's Exact Test for Count Data
##
## data: stat_data_organ
## p-value = 0.7153
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.2495643 10.4647177
## sample estimates:
## odds ratio
## 1.456644
stat_data_sex <- table(clinical_factor$class,clinical_factor$Sex)
fisher.test(stat_data_sex)
##
## Fisher's Exact Test for Count Data
##
## data: stat_data_sex
## p-value = 0.7899
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.2504095 2.7417911
## sample estimates:
## odds ratio
## 0.8312827
stat_data_stage <- table(clinical_factor$class,clinical_factor$Stage)
fisher.test(stat_data_stage)
##
## Fisher's Exact Test for Count Data
##
## data: stat_data_stage
## p-value = 0.109
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.7476062 8.7792316
## sample estimates:
## odds ratio
## 2.500865
stat_data_side <- table(clinical_factor$class,clinical_factor$sidedness)
fisher.test(stat_data_side)
##
## Fisher's Exact Test for Count Data
##
## data: stat_data_side
## p-value = 0.04474
## alternative hypothesis: two.sided
hist(clinical_factor$Age[clinical_factor$class=="P"])
hist(clinical_factor$Age[clinical_factor$class=="Pm"])
tapply(clinical_factor$Age,clinical_factor$class, summary)
## $P
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 39.00 62.50 73.50 70.36 77.75 85.00 3
##
## $Pm
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 37.00 58.75 69.00 68.77 77.25 87.00 4
t.test(Age ~ class, clinical_factor)
##
## Welch Two Sample t-test
##
## data: Age by class
## t = 0.46709, df = 45.47, p-value = 0.6427
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -5.278678 8.467489
## sample estimates:
## mean in group P mean in group Pm
## 70.36364 68.76923
data <- merge(datasurv1, clinical, by="row.names")
fit <- survfit(Surv(time, Status) ~ Stage, data = data)
print(fit)
## Call: survfit(formula = Surv(time, Status) ~ Stage, data = data)
##
## n events median 0.95LCL 0.95UCL
## Stage=II 24 9 3728 1816 NA
## Stage=III 31 16 1682 807 NA
# Summary of survival curves
summary(fit)
## Call: survfit(formula = Surv(time, Status) ~ Stage, data = data)
##
## Stage=II
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 173 24 1 0.958 0.0408 0.882 1.000
## 400 22 1 0.915 0.0577 0.808 1.000
## 467 21 1 0.871 0.0695 0.745 1.000
## 1357 17 1 0.820 0.0821 0.674 0.998
## 1428 16 1 0.769 0.0916 0.609 0.971
## 1635 15 1 0.717 0.0988 0.548 0.940
## 1816 14 1 0.666 0.1042 0.490 0.905
## 2011 11 1 0.606 0.1109 0.423 0.867
## 3728 5 1 0.485 0.1400 0.275 0.854
##
## Stage=III
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 216 30 1 0.967 0.0328 0.905 1.000
## 386 29 1 0.933 0.0455 0.848 1.000
## 420 28 1 0.900 0.0548 0.799 1.000
## 500 27 1 0.867 0.0621 0.753 0.997
## 518 26 1 0.833 0.0680 0.710 0.978
## 520 25 1 0.800 0.0730 0.669 0.957
## 573 24 1 0.767 0.0772 0.629 0.934
## 632 23 1 0.733 0.0807 0.591 0.910
## 682 22 1 0.700 0.0837 0.554 0.885
## 797 21 1 0.667 0.0861 0.518 0.859
## 807 20 1 0.633 0.0880 0.482 0.832
## 857 19 1 0.600 0.0894 0.448 0.804
## 1107 15 1 0.560 0.0920 0.406 0.773
## 1157 12 1 0.513 0.0954 0.357 0.739
## 1682 10 1 0.462 0.0987 0.304 0.702
## 1835 9 1 0.411 0.1002 0.255 0.663
# Access to the sort summary table
summary(fit)$table
## records n.max n.start events *rmean *se(rmean) median 0.95LCL
## Stage=II 24 24 24 9 3135.733 361.0049 3728 1816
## Stage=III 31 31 31 16 2371.863 361.9438 1682 807
## 0.95UCL
## Stage=II NA
## Stage=III NA
ggsurvplot(fit, data = data, pval = TRUE)
# the log/rank test
surv.stage <- survdiff(Surv(time,Status) ~ Stage, data = data)
surv.stage
## Call:
## survdiff(formula = Surv(time, Status) ~ Stage, data = data)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## Stage=II 24 9 12.8 1.12 2.36
## Stage=III 31 16 12.2 1.17 2.36
##
## Chisq= 2.4 on 1 degrees of freedom, p= 0.1
fit <- survfit(Surv(time, Status) ~ class, data = data)
print(fit)
## Call: survfit(formula = Surv(time, Status) ~ class, data = data)
##
## n events median 0.95LCL 0.95UCL
## class=P 25 3 NA 3728 NA
## class=Pm 30 22 1107 682 2011
# Summary of survival curves
summary(fit)
## Call: survfit(formula = Surv(time, Status) ~ class, data = data)
##
## class=P
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 467 23 1 0.957 0.0425 0.877 1
## 1816 14 1 0.888 0.0768 0.750 1
## 3728 5 1 0.711 0.1703 0.444 1
##
## class=Pm
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 173 30 1 0.967 0.0328 0.905 1.000
## 216 29 1 0.933 0.0455 0.848 1.000
## 386 28 1 0.900 0.0548 0.799 1.000
## 400 27 1 0.867 0.0621 0.753 0.997
## 420 26 1 0.833 0.0680 0.710 0.978
## 500 25 1 0.800 0.0730 0.669 0.957
## 518 24 1 0.767 0.0772 0.629 0.934
## 520 23 1 0.733 0.0807 0.591 0.910
## 573 22 1 0.700 0.0837 0.554 0.885
## 632 21 1 0.667 0.0861 0.518 0.859
## 682 20 1 0.633 0.0880 0.482 0.832
## 797 19 1 0.600 0.0894 0.448 0.804
## 807 18 1 0.567 0.0905 0.414 0.775
## 857 17 1 0.533 0.0911 0.382 0.745
## 1107 15 1 0.498 0.0917 0.347 0.714
## 1157 14 1 0.462 0.0918 0.313 0.682
## 1357 13 1 0.427 0.0913 0.280 0.649
## 1428 12 1 0.391 0.0904 0.249 0.615
## 1635 11 1 0.356 0.0889 0.218 0.580
## 1682 10 1 0.320 0.0868 0.188 0.545
## 1835 9 1 0.284 0.0841 0.159 0.508
## 2011 7 1 0.244 0.0813 0.127 0.469
# Access to the sort summary table
summary(fit)$table
## records n.max n.start events *rmean *se(rmean) median 0.95LCL
## class=P 25 25 25 3 4029.716 264.4579 NA 3728
## class=Pm 30 30 30 22 1799.706 302.9623 1107 682
## 0.95UCL
## class=P NA
## class=Pm 2011
ggsurvplot(fit, data = data, pval = TRUE)
# the log/rank test
surv.stage <- survdiff(Surv(time,Status) ~ class, data = data)
surv.stage
## Call:
## survdiff(formula = Surv(time, Status) ~ class, data = data)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## class=P 25 3 13.1 7.82 16.7
## class=Pm 30 22 11.9 8.66 16.7
##
## Chisq= 16.7 on 1 degrees of freedom, p= 4e-05
newdata <- data[-which(data$sidedness == "rectum"),]
fit <- survfit(Surv(time, Status) ~ sidedness, data = newdata)
print(fit)
## Call: survfit(formula = Surv(time, Status) ~ sidedness, data = newdata)
##
## 4 observations deleted due to missingness
## n events median 0.95LCL 0.95UCL
## sidedness=left 17 9 1428 682 NA
## sidedness=right 24 9 2011 1635 NA
# Summary of survival curves
summary(fit)
## Call: survfit(formula = Surv(time, Status) ~ sidedness, data = newdata)
##
## 4 observations deleted due to missingness
## sidedness=left
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 400 16 1 0.938 0.0605 0.826 1.000
## 467 15 1 0.875 0.0827 0.727 1.000
## 520 14 1 0.812 0.0976 0.642 1.000
## 632 13 1 0.750 0.1083 0.565 0.995
## 682 12 1 0.688 0.1159 0.494 0.957
## 807 11 1 0.625 0.1210 0.428 0.914
## 1107 9 1 0.556 0.1259 0.356 0.866
## 1428 8 1 0.486 0.1279 0.290 0.814
## 1835 7 1 0.417 0.1271 0.229 0.758
##
## sidedness=right
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 173 24 1 0.958 0.0408 0.882 1.000
## 420 22 1 0.915 0.0577 0.808 1.000
## 500 21 1 0.871 0.0695 0.745 1.000
## 797 19 1 0.825 0.0795 0.683 0.997
## 857 18 1 0.780 0.0873 0.626 0.971
## 1157 11 1 0.709 0.1042 0.531 0.945
## 1635 9 1 0.630 0.1187 0.435 0.911
## 1816 8 1 0.551 0.1273 0.350 0.867
## 2011 5 1 0.441 0.1418 0.235 0.828
# Access to the sort summary table
summary(fit)$table
## records n.max n.start events *rmean *se(rmean) median 0.95LCL
## sidedness=left 17 17 17 9 2412.722 467.4406 1428 682
## sidedness=right 24 24 24 9 2698.588 443.2539 2011 1635
## 0.95UCL
## sidedness=left NA
## sidedness=right NA
ggsurvplot(fit, data = newdata, pval = TRUE)
# the log/rank test
surv.stage <- survdiff(Surv(time,Status) ~ sidedness, data = newdata)
surv.stage
## Call:
## survdiff(formula = Surv(time, Status) ~ sidedness, data = newdata)
##
## n=41, 4 observations deleted due to missingness.
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## sidedness=left 17 9 7.53 0.289 0.5
## sidedness=right 24 9 10.47 0.207 0.5
##
## Chisq= 0.5 on 1 degrees of freedom, p= 0.5
xdata <- rnaseq1[ order(row.names(rnaseq1)), ]
rownames(DATASET1_bal) <- DATASET1_bal$ID
ydata <- as.data.frame(DATASET1_bal[ order(row.names(DATASET1_bal)), ])
rownames(ydata) <- ydata$ID
xdata <- xdata[rownames(xdata) %in%
rownames(ydata),]
ydata <- as.data.frame(ydata[rownames(ydata) %in%
rownames(xdata),])
# keep features with standard deviation > 0
xdata <- xdata[,sapply(seq(ncol(xdata)), function(ix) {sd(xdata[,ix])}) != 0]
xdata <- t(xdata)
group <- as.factor(ydata$class)
class <- as.data.frame(ydata$class)
edgeR.DGElist <- DGEList(counts=xdata, group = group)
# remove genes that do not have one count per million in at least 5 samples
keep <- rowSums(cpm(edgeR.DGElist) >= 1) >= 5
edgeR.DGElist <- edgeR.DGElist[keep ,]
# specify the design setup
design <- model.matrix(~group)
# estimate the dispersion for all read counts across all samples
edgeR.DGElist <- estimateDisp(edgeR.DGElist, design)
# fit the negative binomial model
edger_fit <- glmFit(edgeR.DGElist, design )
# perform the testing for every gene using the neg. binomial model
edger_lrt <- glmLRT(edger_fit)
summary(decideTests(edger_lrt))
## groupPm
## Down 99
## NotSig 20096
## Up 39
# extract results from edger _lrt$ table plus adjusted p- values
DGE.results_edgeR <- topTags(edger_lrt, n = Inf , sort.by = "PValue" , adjust.method = "BH" )
topTags(DGE.results_edgeR) #table with the top10 DEGs
## Coefficient: groupPm
## logFC logCPM LR PValue FDR
## SPOCK2 -3.402797 3.69918001 38.67871 4.996291e-10 1.010949e-05
## LGR6 -2.025917 2.03265558 32.84611 9.975057e-09 8.069582e-05
## ORM1 5.389676 1.39204222 32.23599 1.365384e-08 8.069582e-05
## H2BS1 6.950155 -2.01960359 31.93370 1.595252e-08 8.069582e-05
## MTRNR2L12 -3.810406 4.36430059 30.37005 3.569974e-08 1.444697e-04
## NELL2 -3.087534 1.88688828 29.37419 5.966713e-08 1.780755e-04
## CXCL11 -3.012100 2.06930721 29.31223 6.160565e-08 1.780755e-04
## IDO1 -2.451621 2.14732736 28.91556 7.560303e-08 1.912190e-04
## SLCO1B1 -4.505595 0.04310525 27.70892 1.410109e-07 3.170239e-04
## SLCO1B7 -3.255436 1.08014684 27.13707 1.895286e-07 3.834922e-04
genes_deg <- DGE.results_edgeR$table
genes_deg <- genes_deg[which(genes_deg$FDR < 0.05),]
dim(genes_deg)#genes found to be differentially expressed
## [1] 138 5
genes_deg$row <- row.names(genes_deg)
write_xlsx(genes_deg,"genes_deg_d3.xlsx")
top100_deg <- rownames(genes_deg[1:100,])
# highly expressed genes
high <- genes_deg[genes_deg$logFC > 0,]
dim(high)
## [1] 39 6
top100_high <- rownames(high[1:100,])
#low expressed genes
low <- genes_deg[genes_deg$logFC < 0,]
dim(low)
## [1] 99 6
top100_low <- rownames(low[1:100,])
xdata.raw <- xdataT
# keep features with standard deviation > 0
xdata <- xdata.raw[,sapply(seq(ncol(xdata.raw)), function(ix) {sd(xdata.raw[,ix])}) != 0]
ydata.raw <- as.data.frame(clinic1$class)
ydata.raw$row <- DATASET1$ID
# ydata.raw$class <- c(rep(0,28),rep(1,34))
ydata.raw$`clinic1$class`[ydata.raw$`clinic1$class` == "P"] <- 0
ydata.raw$`clinic1$class`[ydata.raw$`clinic1$class` == "Pm"] <- 1
rownames(ydata.raw) <- DATASET1$ID
xdata <- xdata[rownames(xdata) %in%
rownames(ydata.raw),]
ydata.raw <- as.data.frame(ydata.raw[rownames(ydata.raw) %in%
rownames(xdata),])
xdata <- xdata[ order(row.names(xdata)), ]
ydata.raw <- ydata.raw[ order(row.names(ydata.raw)), ]
ydata <- as.data.frame(ydata.raw[,1:2])
ydata$`clinic1$class` <- as.numeric(ydata$`clinic1$class`)
Five classifiers were used: Decision trees, linear and radial support vector machines, logistic regression and random forest
xdata <- xdataT[,top100_deg[1:50]]
nomesgenes <- colnames(xdata)
colnames(xdata) <- paste0("Var", 1:50)
colnames(ydata) <- c("class","row")
xdata$type <- as.factor(ydata$class)
#xdata <- xdata[colMeans(xdata == 0) <= 0.6] #delete genes that have null values in at least 60% of the samples
## [1] 1
## [1] 1011
## [1] 2
## [1] 1022
## [1] 3
## [1] 1033
## [1] 4
## [1] 1044
## [1] 5
## [1] 1055
## [1] 6
## [1] 1066
## [1] 7
## [1] 1077
## [1] 8
## [1] 1088
## [1] 9
## [1] 1099
## [1] 10
## [1] 1110
## [1] 11
## [1] 1121
## [1] 12
## [1] 1132
## [1] 13
## [1] 1143
## [1] 14
## [1] 1154
## [1] 15
## [1] 1165
## [1] 16
## [1] 1176
## [1] 17
## [1] 1187
## [1] 18
## [1] 1198
## [1] 19
## [1] 1209
## [1] 20
## [1] 1220
## [1] 21
## [1] 1231
## [1] 22
## [1] 1242
## [1] 23
## [1] 1253
## [1] 24
## [1] 1264
## [1] 25
## [1] 1275
## [1] 26
## [1] 1286
## [1] 27
## [1] 1297
## [1] 28
## [1] 1308
## [1] 29
## [1] 1319
## [1] 30
## [1] 1330
## [1] 31
## [1] 1341
## [1] 32
## [1] 1352
## [1] 33
## [1] 1363
## [1] 34
## [1] 1374
## [1] 35
## [1] 1385
## [1] 36
## [1] 1396
## [1] 37
## [1] 1407
## [1] 38
## [1] 1418
## [1] 39
## [1] 1429
## [1] 40
## [1] 1440
## [1] 41
## [1] 1451
## [1] 42
## [1] 1462
## [1] 43
## [1] 1473
## [1] 44
## [1] 1484
## [1] 45
## [1] 1495
## [1] 46
## [1] 1506
## [1] 47
## [1] 1517
## [1] 48
## [1] 1528
## [1] 49
## [1] 1539
## [1] 50
## [1] 1550
## [1] 51
## [1] 1561
## [1] 52
## [1] 1572
## [1] 53
## [1] 1583
## [1] 54
## [1] 1594
## [1] 55
## [1] 1605
## [1] 56
## [1] 1616
## [1] 57
## [1] 1627
## [1] 58
## [1] 1638
## [1] 59
## [1] 1649
## [1] 60
## [1] 1660
## [1] 61
## [1] 1671
## [1] 62
## [1] 1682
## [1] 63
## [1] 1693
## [1] 64
## [1] 1704
## [1] 65
## [1] 1715
## [1] 66
## [1] 1726
## [1] 67
## [1] 1737
## [1] 68
## [1] 1748
## [1] 69
## [1] 1759
## [1] 70
## [1] 1770
## [1] 71
## [1] 1781
## [1] 72
## [1] 1792
## [1] 73
## [1] 1803
## [1] 74
## [1] 1814
## [1] 75
## [1] 1825
## [1] 76
## [1] 1836
## [1] 77
## [1] 1847
## [1] 78
## [1] 1858
## [1] 79
## [1] 1869
## [1] 80
## [1] 1880
## [1] 81
## [1] 1891
## [1] 82
## [1] 1902
## [1] 83
## [1] 1913
## [1] 84
## [1] 1924
## [1] 85
## [1] 1935
## [1] 86
## [1] 1946
## [1] 87
## [1] 1957
## [1] 88
## [1] 1968
## [1] 89
## [1] 1979
## [1] 90
## [1] 1990
## [1] 91
## [1] 2001
## [1] 92
## [1] 2012
## [1] 93
## [1] 2023
## [1] 94
## [1] 2034
## [1] 95
## [1] 2045
## [1] 96
## [1] 2056
## [1] 97
## [1] 2067
## [1] 98
## [1] 2078
## [1] 99
## [1] 2089
## [1] 100
## [1] 2100
# acc
acc_trees <- acc_train[1,]
hist(acc_trees)
mean(acc_trees)
## [1] 0.9915789
median(acc_trees)
## [1] 1
sd(acc_trees)
## [1] 0.01393505
acc_svm <- acc_train[2,]
hist(acc_svm)
mean(acc_svm)
## [1] 0.8239474
median(acc_svm)
## [1] 0.8157895
sd(acc_svm)
## [1] 0.03339099
acc_svmR <- acc_train[3,]
hist(acc_svmR)
mean(acc_svmR)
## [1] 0.8431579
median(acc_svmR)
## [1] 0.8947368
sd(acc_svmR)
## [1] 0.1519306
acc_logs <- acc_train[4,]
hist(acc_logs)
mean(acc_logs)
## [1] 1
median(acc_logs)
## [1] 1
sd(acc_logs)
## [1] 0
acc_rf <- acc_train[5,]
hist(acc_rf)
mean(acc_rf)
## [1] 1
median(acc_rf)
## [1] 1
sd(acc_rf)
## [1] 0
# auc
auc_trees <- auc_train[1,]
mean(auc_trees)
## [1] 0.9918768
median(auc_trees)
## [1] 1
sd(auc_trees)
## [1] 0.01364299
auc_svm <- auc_train[2,]
mean(auc_svm)
## [1] 0.8077171
median(auc_svm)
## [1] 0.7941176
sd(auc_svm)
## [1] 0.04325313
auc_svmR <- auc_train[3,]
mean(auc_svmR)
## [1] 0.8316527
median(auc_svmR)
## [1] 0.8991597
sd(auc_svmR)
## [1] 0.1722223
auc_logs <- auc_train[4,]
mean(auc_logs)
## [1] 1
median(auc_logs)
## [1] 1
sd(auc_logs)
## [1] 0
auc_rf <- auc_train[5,]
mean(auc_rf)
## [1] 1
median(auc_rf)
## [1] 1
sd(auc_rf)
## [1] 0
# miscl
miscl_trees <- miscl_train[1,]
mean(miscl_trees)
## [1] 0.32
median(miscl_trees)
## [1] 0
sd(miscl_trees)
## [1] 0.5295319
miscl_svm <- miscl_train[2,]
mean(miscl_svm)
## [1] 6.69
median(miscl_svm)
## [1] 7
sd(miscl_svm)
## [1] 1.268858
miscl_svmR <- miscl_train[3,]
mean(miscl_svmR)
## [1] 5.96
median(miscl_svmR)
## [1] 4
sd(miscl_svmR)
## [1] 5.773363
miscl_logs <- miscl_train[4,]
mean(miscl_logs)
## [1] 0
median(miscl_logs)
## [1] 0
sd(miscl_logs)
## [1] 0
miscl_rf <- miscl_train[5,]
mean(miscl_rf)
## [1] 0
median(miscl_rf)
## [1] 0
sd(miscl_rf)
## [1] 0
# sensitivity
sensitivity_trees <- sensitivity_train[1,]
mean(sensitivity_trees)
## [1] 0.9947059
median(sensitivity_trees)
## [1] 1
sd(sensitivity_trees)
## [1] 0.01691903
sensitivity_svm <- sensitivity_train[2,]
mean(sensitivity_svm)
## [1] 0.6535294
median(sensitivity_svm)
## [1] 0.5882353
sd(sensitivity_svm)
## [1] 0.1579497
sensitivity_svmR <- sensitivity_train[3,]
mean(sensitivity_svmR)
## [1] 0.7223529
median(sensitivity_svmR)
## [1] 0.9411765
sd(sensitivity_svmR)
## [1] 0.3760582
sensitivity_logs <- sensitivity_train[4,]
mean(sensitivity_logs)
## [1] 1
median(sensitivity_logs)
## [1] 1
sd(sensitivity_logs)
## [1] 0
sensitivity_rf <- sensitivity_train[5,]
mean(sensitivity_rf)
## [1] 1
median(sensitivity_rf)
## [1] 1
sd(sensitivity_rf)
## [1] 0
# specificity
specificity_trees <- specificity_train[1,]
mean(specificity_trees)
## [1] 0.9890476
median(specificity_trees)
## [1] 1
sd(specificity_trees)
## [1] 0.0201406
specificity_svm <- specificity_train[2,]
mean(specificity_svm)
## [1] 0.9619048
median(specificity_svm)
## [1] 1
sd(specificity_svm)
## [1] 0.08694009
specificity_svmR <- specificity_train[3,]
mean(specificity_svmR)
## [1] 0.9409524
median(specificity_svmR)
## [1] 1
sd(specificity_svmR)
## [1] 0.08402482
specificity_logs <- specificity_train[4,]
mean(specificity_logs)
## [1] 1
median(specificity_logs)
## [1] 1
sd(specificity_logs)
## [1] 0
specificity_rf <- specificity_train[5,]
mean(specificity_rf)
## [1] 1
median(specificity_rf)
## [1] 1
sd(specificity_rf)
## [1] 0
# fneg
fneg_trees <- fneg_train[1,]
mean(fneg_trees)
## [1] 0.09
median(fneg_trees)
## [1] 0
sd(fneg_trees)
## [1] 0.2876235
fneg_svm <- fneg_train[2,]
mean(fneg_svm)
## [1] 5.89
median(fneg_svm)
## [1] 7
sd(fneg_svm)
## [1] 2.685144
fneg_svmR <- fneg_train[3,]
mean(fneg_svmR)
## [1] 4.72
median(fneg_svmR)
## [1] 1
sd(fneg_svmR)
## [1] 6.392989
fneg_logs <- fneg_train[4,]
mean(fneg_logs)
## [1] 0
median(fneg_logs)
## [1] 0
sd(fneg_logs)
## [1] 0
fneg_rf <- fneg_train[5,]
mean(fneg_rf)
## [1] 0
median(fneg_rf)
## [1] 0
sd(fneg_rf)
## [1] 0
# acc
acc_trees <- acc[1,]
hist(acc_trees)
mean(acc_trees)
## [1] 0.6
median(acc_trees)
## [1] 0.5882353
sd(acc_trees)
## [1] 0.1010241
acc_svm <- acc[2,]
hist(acc_svm)
mean(acc_svm)
## [1] 0.6864706
median(acc_svm)
## [1] 0.7058824
sd(acc_svm)
## [1] 0.08321214
acc_svmR <- acc[3,]
hist(acc_svmR)
mean(acc_svmR)
## [1] 0.5411765
median(acc_svmR)
## [1] 0.5294118
sd(acc_svmR)
## [1] 0.09043601
acc_logs <- acc[4,]
hist(acc_logs)
mean(acc_logs)
## [1] 0.6652941
median(acc_logs)
## [1] 0.6470588
sd(acc_logs)
## [1] 0.1049923
acc_rf <- acc[5,]
hist(acc_rf)
mean(acc_rf)
## [1] 0.6805882
median(acc_rf)
## [1] 0.7058824
sd(acc_rf)
## [1] 0.1018013
# auc
auc_trees <- auc[1,]
mean(auc_trees)
## [1] 0.6041667
median(auc_trees)
## [1] 0.5868056
sd(auc_trees)
## [1] 0.09547798
auc_svm <- auc[2,]
mean(auc_svm)
## [1] 0.6746528
median(auc_svm)
## [1] 0.6875
sd(auc_svm)
## [1] 0.08828012
auc_svmR <- auc[3,]
mean(auc_svmR)
## [1] 0.5403472
median(auc_svmR)
## [1] 0.5138889
sd(auc_svmR)
## [1] 0.084279
auc_logs <- auc[4,]
mean(auc_logs)
## [1] 0.6639583
median(auc_logs)
## [1] 0.6458333
sd(auc_logs)
## [1] 0.1014728
auc_rf <- auc[5,]
mean(auc_rf)
## [1] 0.6722222
median(auc_rf)
## [1] 0.6944444
sd(auc_rf)
## [1] 0.1026285
# miscl
miscl_trees <- miscl[1,]
mean(miscl_trees)
## [1] 6.8
median(miscl_trees)
## [1] 7
sd(miscl_trees)
## [1] 1.717409
miscl_svm <- miscl[2,]
mean(miscl_svm)
## [1] 5.33
median(miscl_svm)
## [1] 5
sd(miscl_svm)
## [1] 1.414606
miscl_svmR <- miscl[3,]
mean(miscl_svmR)
## [1] 7.8
median(miscl_svmR)
## [1] 8
sd(miscl_svmR)
## [1] 1.537412
miscl_logs <- miscl[4,]
mean(miscl_logs)
## [1] 5.69
median(miscl_logs)
## [1] 6
sd(miscl_logs)
## [1] 1.784869
miscl_rf <- miscl[5,]
mean(miscl_rf)
## [1] 5.43
median(miscl_rf)
## [1] 5
sd(miscl_rf)
## [1] 1.730621
# sensitivity
sensitivity_trees <- sensitivity[1,]
mean(sensitivity_trees)
## [1] 0.59
median(sensitivity_trees)
## [1] 0.625
sd(sensitivity_trees)
## [1] 0.1714098
sensitivity_svm <- sensitivity[2,]
mean(sensitivity_svm)
## [1] 0.47375
median(sensitivity_svm)
## [1] 0.5
sd(sensitivity_svm)
## [1] 0.2270355
sensitivity_svmR <- sensitivity[3,]
mean(sensitivity_svmR)
## [1] 0.26125
median(sensitivity_svmR)
## [1] 0.25
sd(sensitivity_svmR)
## [1] 0.2248
sensitivity_logs <- sensitivity[4,]
mean(sensitivity_logs)
## [1] 0.61125
median(sensitivity_logs)
## [1] 0.625
sd(sensitivity_logs)
## [1] 0.1784606
sensitivity_rf <- sensitivity[5,]
mean(sensitivity_rf)
## [1] 0.53
median(sensitivity_rf)
## [1] 0.5
sd(sensitivity_rf)
## [1] 0.1812903
# specificity
specificity_trees <- specificity[1,]
mean(specificity_trees)
## [1] 0.6088889
median(specificity_trees)
## [1] 0.6666667
sd(specificity_trees)
## [1] 0.1761995
specificity_svm <- specificity[2,]
mean(specificity_svm)
## [1] 0.8755556
median(specificity_svm)
## [1] 0.8888889
sd(specificity_svm)
## [1] 0.1370234
specificity_svmR <- specificity[3,]
mean(specificity_svmR)
## [1] 0.79
median(specificity_svmR)
## [1] 0.8888889
sd(specificity_svmR)
## [1] 0.2210378
specificity_logs <- specificity[4,]
mean(specificity_logs)
## [1] 0.7133333
median(specificity_logs)
## [1] 0.7222222
sd(specificity_logs)
## [1] 0.1876469
specificity_rf <- specificity[5,]
mean(specificity_rf)
## [1] 0.8144444
median(specificity_rf)
## [1] 0.8888889
sd(specificity_rf)
## [1] 0.1579699
# fneg
fneg_trees <- fneg[1,]
mean(fneg_trees)
## [1] 3.28
median(fneg_trees)
## [1] 3
sd(fneg_trees)
## [1] 1.371278
fneg_svm <- fneg[2,]
mean(fneg_svm)
## [1] 4.21
median(fneg_svm)
## [1] 4
sd(fneg_svm)
## [1] 1.816284
fneg_svmR <- fneg[3,]
mean(fneg_svmR)
## [1] 5.91
median(fneg_svmR)
## [1] 6
sd(fneg_svmR)
## [1] 1.7984
fneg_logs <- fneg[4,]
mean(fneg_logs)
## [1] 3.11
median(fneg_logs)
## [1] 3
sd(fneg_logs)
## [1] 1.427684
fneg_rf <- fneg[5,]
mean(fneg_rf)
## [1] 3.76
median(fneg_rf)
## [1] 4
sd(fneg_rf)
## [1] 1.450322
xdata <- xdataT
ydata.raw <- as.data.frame(clinic1$class)
ydata.raw$row <- DATASET1$ID
#
# # ydata.raw$class <- c(rep(0,28),rep(1,34))
#
ydata.raw$`clinic1$class`[ydata.raw$`clinic1$class` == "P"] <- 0
ydata.raw$`clinic1$class`[ydata.raw$`clinic1$class` == "Pm"] <- 1
rownames(ydata.raw) <- DATASET1$ID
xdata <- xdata[rownames(xdata) %in%
rownames(ydata.raw),]
ydata.raw <- as.data.frame(ydata.raw[rownames(ydata.raw) %in%
rownames(xdata),])
xdata <- xdata[ order(row.names(xdata)), ]
ydata.raw <- ydata.raw[ order(row.names(ydata.raw)), ]
ydata <- as.data.frame(ydata.raw)
ydata$`clinic1$class` <- as.numeric(ydata$`clinic1$class`)
## [1] 0
## [1] 1
## [1] 1011
## [1] "data"
## [1] "en pred ups"
## [1] "EN"
## [1] 1
## [1] 2
## [1] 1022
## [1] "data"
## [1] "EN"
## [1] 2
## [1] 3
## [1] 1033
## [1] "data"
## [1] "EN"
## [1] 3
## [1] 4
## [1] 1044
## [1] "data"
## [1] "EN não selecionou variaveis"
## [1] 3
## [1] 5
## [1] 1055
## [1] "data"
## [1] "EN"
## [1] 4
## [1] 6
## [1] 1066
## [1] "data"
## [1] "EN"
## [1] 5
## [1] 7
## [1] 1077
## [1] "data"
## [1] "EN"
## [1] 6
## [1] 8
## [1] 1088
## [1] "data"
## [1] "EN"
## [1] 7
## [1] 9
## [1] 1099
## [1] "data"
## [1] "EN"
## [1] 8
## [1] 10
## [1] 1110
## [1] "data"
## [1] "EN"
## [1] 9
## [1] 11
## [1] 1121
## [1] "data"
## [1] "EN"
## [1] 10
## [1] 12
## [1] 1132
## [1] "data"
## [1] "EN não selecionou variaveis"
## [1] 10
## [1] 13
## [1] 1143
## [1] "data"
## [1] "EN"
## [1] 11
## [1] 14
## [1] 1154
## [1] "data"
## [1] "EN"
## [1] 12
## [1] 15
## [1] 1165
## [1] "data"
## [1] "EN"
## [1] 13
## [1] 16
## [1] 1176
## [1] "data"
## [1] "en pred ups"
## [1] "EN"
## [1] 14
## [1] 17
## [1] 1187
## [1] "data"
## [1] "EN"
## [1] 15
## [1] 18
## [1] 1198
## [1] "data"
## [1] "EN"
## [1] 16
## [1] 19
## [1] 1209
## [1] "data"
## [1] "EN"
## [1] 17
## [1] 20
## [1] 1220
## [1] "data"
## [1] "EN"
## [1] 18
## [1] 21
## [1] 1231
## [1] "data"
## [1] "en train ups"
## [1] "en pred ups"
## [1] "EN"
## [1] 18
## [1] 22
## [1] 1242
## [1] "data"
## [1] "EN"
## [1] 19
## [1] 23
## [1] 1253
## [1] "data"
## [1] "EN"
## [1] 20
## [1] 24
## [1] 1264
## [1] "data"
## [1] "EN"
## [1] 21
## [1] 25
## [1] 1275
## [1] "data"
## [1] "EN"
## [1] 22
## [1] 26
## [1] 1286
## [1] "data"
## [1] "EN"
## [1] 23
## [1] 27
## [1] 1297
## [1] "data"
## [1] "EN"
## [1] 24
## [1] 28
## [1] 1308
## [1] "data"
## [1] "EN"
## [1] 25
## [1] 29
## [1] 1319
## [1] "data"
## [1] "EN"
## [1] 26
## [1] 30
## [1] 1330
## [1] "data"
## [1] "EN"
## [1] 27
## [1] 31
## [1] 1341
## [1] "data"
## [1] "EN não selecionou variaveis"
## [1] 27
## [1] 32
## [1] 1352
## [1] "data"
## [1] "EN"
## [1] 28
## [1] 33
## [1] 1363
## [1] "data"
## [1] "EN"
## [1] 29
## [1] 34
## [1] 1374
## [1] "data"
## [1] "EN não selecionou variaveis"
## [1] 29
## [1] 35
## [1] 1385
## [1] "data"
## [1] "EN"
## [1] 30
## [1] 36
## [1] 1396
## [1] "data"
## [1] "EN"
## [1] 31
## [1] 37
## [1] 1407
## [1] "data"
## [1] "EN"
## [1] 32
## [1] 38
## [1] 1418
## [1] "data"
## [1] "EN"
## [1] 33
## [1] 39
## [1] 1429
## [1] "data"
## [1] "EN não selecionou variaveis"
## [1] 33
## [1] 40
## [1] 1440
## [1] "data"
## [1] "en train ups"
## [1] "en pred ups"
## [1] "EN"
## [1] 33
## [1] 41
## [1] 1451
## [1] "data"
## [1] "EN"
## [1] 34
## [1] 42
## [1] 1462
## [1] "data"
## [1] "en pred ups"
## [1] "EN"
## [1] 35
## [1] 43
## [1] 1473
## [1] "data"
## [1] "EN"
## [1] 36
## [1] 44
## [1] 1484
## [1] "data"
## [1] "EN"
## [1] 37
## [1] 45
## [1] 1495
## [1] "data"
## [1] "EN"
## [1] 38
## [1] 46
## [1] 1506
## [1] "data"
## [1] "EN"
## [1] 39
## [1] 47
## [1] 1517
## [1] "data"
## [1] "en pred ups"
## [1] "EN"
## [1] 40
## [1] 48
## [1] 1528
## [1] "data"
## [1] "EN"
## [1] 41
## [1] 49
## [1] 1539
## [1] "data"
## [1] "EN"
## [1] 42
## [1] 50
## [1] 1550
## [1] "data"
## [1] "EN"
## [1] 43
## [1] 51
## [1] 1561
## [1] "data"
## [1] "EN não selecionou variaveis"
## [1] 43
## [1] 52
## [1] 1572
## [1] "data"
## [1] "EN"
## [1] 44
## [1] 53
## [1] 1583
## [1] "data"
## [1] "EN não selecionou variaveis"
## [1] 44
## [1] 54
## [1] 1594
## [1] "data"
## [1] "en train ups"
## [1] "en pred ups"
## [1] "EN"
## [1] 44
## [1] 55
## [1] 1605
## [1] "data"
## [1] "EN"
## [1] 45
## [1] 56
## [1] 1616
## [1] "data"
## [1] "EN"
## [1] 46
## [1] 57
## [1] 1627
## [1] "data"
## [1] "EN"
## [1] 47
## [1] 58
## [1] 1638
## [1] "data"
## [1] "en pred ups"
## [1] "EN"
## [1] 48
## [1] 59
## [1] 1649
## [1] "data"
## [1] "EN"
## [1] 49
## [1] 60
## [1] 1660
## [1] "data"
## [1] "EN"
## [1] 50
## [1] 61
## [1] 1671
## [1] "data"
## [1] "EN"
## [1] 51
## [1] 62
## [1] 1682
## [1] "data"
## [1] "EN"
## [1] 52
## [1] 63
## [1] 1693
## [1] "data"
## [1] "EN"
## [1] 53
## [1] 64
## [1] 1704
## [1] "data"
## [1] "EN"
## [1] 54
## [1] 65
## [1] 1715
## [1] "data"
## [1] "EN"
## [1] 55
## [1] 66
## [1] 1726
## [1] "data"
## [1] "EN"
## [1] 56
## [1] 67
## [1] 1737
## [1] "data"
## [1] "EN"
## [1] 57
## [1] 68
## [1] 1748
## [1] "data"
## [1] "EN"
## [1] 58
## [1] 69
## [1] 1759
## [1] "data"
## [1] "EN"
## [1] 59
## [1] 70
## [1] 1770
## [1] "data"
## [1] "EN"
## [1] 60
## [1] 71
## [1] 1781
## [1] "data"
## [1] "EN"
## [1] 61
## [1] 72
## [1] 1792
## [1] "data"
## [1] "EN"
## [1] 62
## [1] 73
## [1] 1803
## [1] "data"
## [1] "EN"
## [1] 63
## [1] 74
## [1] 1814
## [1] "data"
## [1] "EN não selecionou variaveis"
## [1] 63
## [1] 75
## [1] 1825
## [1] "data"
## [1] "EN não selecionou variaveis"
## [1] 63
## [1] 76
## [1] 1836
## [1] "data"
## [1] "EN"
## [1] 64
## [1] 77
## [1] 1847
## [1] "data"
## [1] "EN"
## [1] 65
## [1] 78
## [1] 1858
## [1] "data"
## [1] "EN"
## [1] 66
## [1] 79
## [1] 1869
## [1] "data"
## [1] "EN não selecionou variaveis"
## [1] 66
## [1] 80
## [1] 1880
## [1] "data"
## [1] "EN"
## [1] 67
## [1] 81
## [1] 1891
## [1] "data"
## [1] "EN"
## [1] 68
## [1] 82
## [1] 1902
## [1] "data"
## [1] "EN não selecionou variaveis"
## [1] 68
## [1] 83
## [1] 1913
## [1] "data"
## [1] "EN"
## [1] 69
## [1] 84
## [1] 1924
## [1] "data"
## [1] "EN"
## [1] 70
## [1] 85
## [1] 1935
## [1] "data"
## [1] "EN"
## [1] 71
## [1] 86
## [1] 1946
## [1] "data"
## [1] "EN"
## [1] 72
## [1] 87
## [1] 1957
## [1] "data"
## [1] "EN não selecionou variaveis"
## [1] 72
## [1] 88
## [1] 1968
## [1] "data"
## [1] "EN"
## [1] 73
## [1] 89
## [1] 1979
## [1] "data"
## [1] "EN não selecionou variaveis"
## [1] 73
## [1] 90
## [1] 1990
## [1] "data"
## [1] "EN não selecionou variaveis"
## [1] 73
## [1] 91
## [1] 2001
## [1] "data"
## [1] "EN"
## [1] 74
## [1] 92
## [1] 2012
## [1] "data"
## [1] "EN"
## [1] 75
## [1] 93
## [1] 2023
## [1] "data"
## [1] "EN"
## [1] 76
## [1] 94
## [1] 2034
## [1] "data"
## [1] "EN não selecionou variaveis"
## [1] 76
## [1] 95
## [1] 2045
## [1] "data"
## [1] "EN"
## [1] 77
## [1] 96
## [1] 2056
## [1] "data"
## [1] "EN"
## [1] 78
## [1] 97
## [1] 2067
## [1] "data"
## [1] "EN"
## [1] 79
## [1] 98
## [1] 2078
## [1] "data"
## [1] "EN"
## [1] 80
## [1] 99
## [1] 2089
## [1] "data"
## [1] "EN"
## [1] 81
## [1] 100
## [1] 2100
## [1] "data"
## [1] "EN não selecionou variaveis"
## [1] 81
## [1] 101
## [1] 2111
## [1] "data"
## [1] "EN"
## [1] 82
## [1] 102
## [1] 2122
## [1] "data"
## [1] "EN não selecionou variaveis"
## [1] 82
## [1] 103
## [1] 2133
## [1] "data"
## [1] "EN"
## [1] 83
## [1] 104
## [1] 2144
## [1] "data"
## [1] "EN não selecionou variaveis"
## [1] 83
## [1] 105
## [1] 2155
## [1] "data"
## [1] "EN"
## [1] 84
## [1] 106
## [1] 2166
## [1] "data"
## [1] "EN"
## [1] 85
## [1] 107
## [1] 2177
## [1] "data"
## [1] "EN"
## [1] 86
## [1] 108
## [1] 2188
## [1] "data"
## [1] "EN"
## [1] 87
## [1] 109
## [1] 2199
## [1] "data"
## [1] "EN"
## [1] 88
## [1] 110
## [1] 2210
## [1] "data"
## [1] "EN"
## [1] 89
## [1] 111
## [1] 2221
## [1] "data"
## [1] "EN"
## [1] 90
## [1] 112
## [1] 2232
## [1] "data"
## [1] "EN"
## [1] 91
## [1] 113
## [1] 2243
## [1] "data"
## [1] "EN não selecionou variaveis"
## [1] 91
## [1] 114
## [1] 2254
## [1] "data"
## [1] "EN"
## [1] 92
## [1] 115
## [1] 2265
## [1] "data"
## [1] "EN não selecionou variaveis"
## [1] 92
## [1] 116
## [1] 2276
## [1] "data"
## [1] "EN"
## [1] 93
## [1] 117
## [1] 2287
## [1] "data"
## [1] "en pred ups"
## [1] "EN"
## [1] 94
## [1] 118
## [1] 2298
## [1] "data"
## [1] "EN"
## [1] 95
## [1] 119
## [1] 2309
## [1] "data"
## [1] "EN"
## [1] 96
## [1] 120
## [1] 2320
## [1] "data"
## [1] "EN"
## [1] 97
## [1] 121
## [1] 2331
## [1] "data"
## [1] "EN"
## [1] 98
## [1] 122
## [1] 2342
## [1] "data"
## [1] "EN"
## [1] 99
## [1] 123
## [1] 2353
## [1] "data"
## [1] "EN"
## [1] 1
## [1] 0
## [1] 2364
## [1] "data"
## [1] "iTwiner"
## [1] 2
## [1] 1
## [1] 2375
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 3
## [1] 1
## [1] 2386
## [1] "data"
## [1] "iTwiner"
## [1] 4
## [1] 2
## [1] 2397
## [1] "data"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 5
## [1] 3
## [1] 2408
## [1] "data"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 6
## [1] 4
## [1] 2419
## [1] "data"
## [1] "iTwiner"
## [1] 7
## [1] 5
## [1] 2430
## [1] "data"
## [1] "iTwiner"
## [1] 8
## [1] 6
## [1] 2441
## [1] "data"
## [1] "iTwiner"
## [1] 9
## [1] 7
## [1] 2452
## [1] "data"
## [1] "iTwiner"
## [1] 10
## [1] 8
## [1] 2463
## [1] "data"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 11
## [1] 9
## [1] 2474
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 12
## [1] 9
## [1] 2485
## [1] "data"
## [1] "iTwiner"
## [1] 13
## [1] 10
## [1] 2496
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 14
## [1] 10
## [1] 2507
## [1] "data"
## [1] "iTwiner"
## [1] 15
## [1] 11
## [1] 2518
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 16
## [1] 11
## [1] 2529
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 17
## [1] 11
## [1] 2540
## [1] "data"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 18
## [1] 12
## [1] 2551
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner"
## [1] 19
## [1] 12
## [1] 2562
## [1] "data"
## [1] "iTwiner"
## [1] 20
## [1] 13
## [1] 2573
## [1] "data"
## [1] "iTwiner"
## [1] 21
## [1] 14
## [1] 2584
## [1] "data"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 22
## [1] 15
## [1] 2595
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner"
## [1] 23
## [1] 15
## [1] 2606
## [1] "data"
## [1] "iTwiner"
## [1] 24
## [1] 16
## [1] 2617
## [1] "data"
## [1] "iTwiner"
## [1] 25
## [1] 17
## [1] 2628
## [1] "data"
## [1] "iTwiner"
## [1] 26
## [1] 18
## [1] 2639
## [1] "data"
## [1] "iTwiner"
## [1] 27
## [1] 19
## [1] 2650
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 28
## [1] 19
## [1] 2661
## [1] "data"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 29
## [1] 20
## [1] 2672
## [1] "data"
## [1] "iTwiner"
## [1] 30
## [1] 21
## [1] 2683
## [1] "data"
## [1] "iTwiner"
## [1] 31
## [1] 22
## [1] 2694
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 32
## [1] 22
## [1] 2705
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner"
## [1] 33
## [1] 22
## [1] 2716
## [1] "data"
## [1] "iTwiner"
## [1] 34
## [1] 23
## [1] 2727
## [1] "data"
## [1] "iTwiner"
## [1] 35
## [1] 24
## [1] 2738
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 36
## [1] 24
## [1] 2749
## [1] "data"
## [1] "iTwiner"
## [1] 37
## [1] 25
## [1] 2760
## [1] "data"
## [1] "iTwiner"
## [1] 38
## [1] 26
## [1] 2771
## [1] "data"
## [1] "iTwiner"
## [1] 39
## [1] 27
## [1] 2782
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 40
## [1] 27
## [1] 2793
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 41
## [1] 27
## [1] 2804
## [1] "data"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 42
## [1] 28
## [1] 2815
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 43
## [1] 28
## [1] 2826
## [1] "data"
## [1] "iTwiner"
## [1] 44
## [1] 29
## [1] 2837
## [1] "data"
## [1] "iTwiner"
## [1] 45
## [1] 30
## [1] 2848
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 46
## [1] 30
## [1] 2859
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 47
## [1] 30
## [1] 2870
## [1] "data"
## [1] "iTwiner"
## [1] 48
## [1] 31
## [1] 2881
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 49
## [1] 31
## [1] 2892
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 50
## [1] 31
## [1] 2903
## [1] "data"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 51
## [1] 32
## [1] 2914
## [1] "data"
## [1] "iTwiner"
## [1] 52
## [1] 33
## [1] 2925
## [1] "data"
## [1] "iTwiner"
## [1] 53
## [1] 34
## [1] 2936
## [1] "data"
## [1] "iTwiner"
## [1] 54
## [1] 35
## [1] 2947
## [1] "data"
## [1] "iTwiner"
## [1] 55
## [1] 36
## [1] 2958
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner"
## [1] 56
## [1] 36
## [1] 2969
## [1] "data"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 57
## [1] 37
## [1] 2980
## [1] "data"
## [1] "iTwiner"
## [1] 58
## [1] 38
## [1] 2991
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner"
## [1] 59
## [1] 38
## [1] 3002
## [1] "data"
## [1] "iTwiner"
## [1] 60
## [1] 39
## [1] 3013
## [1] "data"
## [1] "iTwiner"
## [1] 61
## [1] 40
## [1] 3024
## [1] "data"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 62
## [1] 41
## [1] 3035
## [1] "data"
## [1] "iTwiner"
## [1] 63
## [1] 42
## [1] 3046
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 64
## [1] 42
## [1] 3057
## [1] "data"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 65
## [1] 43
## [1] 3068
## [1] "data"
## [1] "iTwiner"
## [1] 66
## [1] 44
## [1] 3079
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 67
## [1] 44
## [1] 3090
## [1] "data"
## [1] "iTwiner"
## [1] 68
## [1] 45
## [1] 3101
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 69
## [1] 45
## [1] 3112
## [1] "data"
## [1] "iTwiner"
## [1] 70
## [1] 46
## [1] 3123
## [1] "data"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 71
## [1] 47
## [1] 3134
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 72
## [1] 47
## [1] 3145
## [1] "data"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 73
## [1] 48
## [1] 3156
## [1] "data"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 74
## [1] 49
## [1] 3167
## [1] "data"
## [1] "iTwiner"
## [1] 75
## [1] 50
## [1] 3178
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 76
## [1] 50
## [1] 3189
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 77
## [1] 50
## [1] 3200
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 78
## [1] 50
## [1] 3211
## [1] "data"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 79
## [1] 51
## [1] 3222
## [1] "data"
## [1] "iTwiner"
## [1] 80
## [1] 52
## [1] 3233
## [1] "data"
## [1] "iTwiner"
## [1] 81
## [1] 53
## [1] 3244
## [1] "data"
## [1] "iTwiner"
## [1] 82
## [1] 54
## [1] 3255
## [1] "data"
## [1] "iTwiner"
## [1] 83
## [1] 55
## [1] 3266
## [1] "data"
## [1] "iTwiner"
## [1] 84
## [1] 56
## [1] 3277
## [1] "data"
## [1] "iTwiner"
## [1] 85
## [1] 57
## [1] 3288
## [1] "data"
## [1] "iTwiner"
## [1] 86
## [1] 58
## [1] 3299
## [1] "data"
## [1] "iTwiner"
## [1] 87
## [1] 59
## [1] 3310
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 88
## [1] 59
## [1] 3321
## [1] "data"
## [1] "iTwiner"
## [1] 89
## [1] 60
## [1] 3332
## [1] "data"
## [1] "iTwiner"
## [1] 90
## [1] 61
## [1] 3343
## [1] "data"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 91
## [1] 62
## [1] 3354
## [1] "data"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 92
## [1] 63
## [1] 3365
## [1] "data"
## [1] "iTwiner"
## [1] 93
## [1] 64
## [1] 3376
## [1] "data"
## [1] "iTwiner"
## [1] 94
## [1] 65
## [1] 3387
## [1] "data"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 95
## [1] 66
## [1] 3398
## [1] "data"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 96
## [1] 67
## [1] 3409
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 97
## [1] 67
## [1] 3420
## [1] "data"
## [1] "iTwiner"
## [1] 98
## [1] 68
## [1] 3431
## [1] "data"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 99
## [1] 69
## [1] 3442
## [1] "data"
## [1] "iTwiner"
## [1] 100
## [1] 70
## [1] 3453
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 101
## [1] 70
## [1] 3464
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 102
## [1] 70
## [1] 3475
## [1] "data"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 103
## [1] 71
## [1] 3486
## [1] "data"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 104
## [1] 72
## [1] 3497
## [1] "data"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 105
## [1] 73
## [1] 3508
## [1] "data"
## [1] "iTwiner"
## [1] 106
## [1] 74
## [1] 3519
## [1] "data"
## [1] "iTwiner"
## [1] 107
## [1] 75
## [1] 3530
## [1] "data"
## [1] "iTwiner"
## [1] 108
## [1] 76
## [1] 3541
## [1] "data"
## [1] "iTwiner"
## [1] 109
## [1] 77
## [1] 3552
## [1] "data"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 110
## [1] 78
## [1] 3563
## [1] "data"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 111
## [1] 79
## [1] 3574
## [1] "data"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 112
## [1] 80
## [1] 3585
## [1] "data"
## [1] "iTwiner"
## [1] 113
## [1] 81
## [1] 3596
## [1] "data"
## [1] "iTwiner"
## [1] 114
## [1] 82
## [1] 3607
## [1] "data"
## [1] "iTwiner"
## [1] 115
## [1] 83
## [1] 3618
## [1] "data"
## [1] "iTwiner"
## [1] 116
## [1] 84
## [1] 3629
## [1] "data"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 117
## [1] 85
## [1] 3640
## [1] "data"
## [1] "iTwiner"
## [1] 118
## [1] 86
## [1] 3651
## [1] "data"
## [1] "iTwiner"
## [1] 119
## [1] 87
## [1] 3662
## [1] "data"
## [1] "iTwiner"
## [1] 120
## [1] 88
## [1] 3673
## [1] "data"
## [1] "iTwiner"
## [1] 121
## [1] 89
## [1] 3684
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 122
## [1] 89
## [1] 3695
## [1] "data"
## [1] "iTwiner train ups"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 123
## [1] 89
## [1] 3706
## [1] "data"
## [1] "iTwiner"
## [1] 124
## [1] 90
## [1] 3717
## [1] "data"
## [1] "iTwiner"
## [1] 125
## [1] 91
## [1] 3728
## [1] "data"
## [1] "iTwiner"
## [1] 126
## [1] 92
## [1] 3739
## [1] "data"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 127
## [1] 93
## [1] 3750
## [1] "data"
## [1] "iTwiner"
## [1] 128
## [1] 94
## [1] 3761
## [1] "data"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 129
## [1] 95
## [1] 3772
## [1] "data"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 130
## [1] 96
## [1] 3783
## [1] "data"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 131
## [1] 97
## [1] 3794
## [1] "data"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 132
## [1] 98
## [1] 3805
## [1] "data"
## [1] "iTwiner test ups"
## [1] "iTwiner"
## [1] 133
## [1] 99
## [1] 3816
## [1] "data"
## [1] "iTwiner test ups"
## [1] "iTwiner"
runs_en_tr <- which(acc_cox_tr[1,]!=0)
runs_itw_tr <- which(acc_cox_tr[2,]!=0)
runs_en_ts <- which(acc_cox_tes[1,]!=0)
runs_itw_ts <- which(acc_cox_tes[2,]!=0)
# median number of variables selected
nvar_en <- nvar_selected_1[1,]
nvar_en <- nvar_en[runs_en_tr]
mean(nvar_en)
## [1] 41.43
median(nvar_en)
## [1] 39
sd(nvar_en)
## [1] 19.76031
nvar_tw <- nvar_selected_1[2,]
nvar_tw <- nvar_tw[runs_itw_tr]
mean(nvar_tw)
## [1] 44.56
median(nvar_tw)
## [1] 38.5
sd(nvar_tw)
## [1] 20.6546
# EN train
acc_cox_tr_EN <- acc_cox_tr[1,]
acc_cox_tr_EN <- acc_cox_tr_EN[runs_en_tr]
#hist(acc_cox_tr_EN)
mean(acc_cox_tr_EN)
## [1] 0.8010526
median(acc_cox_tr_EN)
## [1] 0.7894737
sd(acc_cox_tr_EN)
## [1] 0.0722694
# EN test
acc_cox_tes_EN <- acc_cox_tes[1,]
acc_cox_tes_EN <- acc_cox_tes_EN[runs_en_ts]
#hist(acc_cox_tes_EN)
mean(acc_cox_tes_EN)
## [1] 0.5744681
median(acc_cox_tes_EN)
## [1] 0.5882353
sd(acc_cox_tes_EN)
## [1] 0.07391765
#iTwiner train
acc_cox_tr_iTwiner <- acc_cox_tr[2,]
acc_cox_tr_iTwiner <- acc_cox_tr_iTwiner[runs_itw_tr]
#hist(acc_cox_tr_iTwiner)
mean(acc_cox_tr_iTwiner)
## [1] 0.705
median(acc_cox_tr_iTwiner)
## [1] 0.7105263
sd(acc_cox_tr_iTwiner)
## [1] 0.06588722
#iTwiner test
acc_cox_tes_iTwiner <- acc_cox_tes[2,]
acc_cox_tes_iTwiner <- acc_cox_tes_iTwiner[runs_itw_ts]
#hist(acc_cox_tes_iTwiner)
mean(acc_cox_tes_iTwiner)
## [1] 0.6263463
median(acc_cox_tes_iTwiner)
## [1] 0.6470588
sd(acc_cox_tes_iTwiner)
## [1] 0.04981289
# EN train
miscl_cox_tr_EN <- miscl_cox_tr[1,]
miscl_cox_tr_EN <- miscl_cox_tr_EN[runs_en_tr]
mean(miscl_cox_tr_EN)
## [1] 7.56
median(miscl_cox_tr_EN)
## [1] 8
sd(miscl_cox_tr_EN)
## [1] 2.746237
# EN test
miscl_cox_tes_EN <- miscl_cox_tes[1,]
miscl_cox_tes_EN <- miscl_cox_tes_EN[runs_en_ts]
mean(miscl_cox_tes_EN)
## [1] 7.234043
median(miscl_cox_tes_EN)
## [1] 7
sd(miscl_cox_tes_EN)
## [1] 1.2566
#iTwiner train
miscl_cox_tr_iTwiner <- miscl_cox_tr[2,]
miscl_cox_tr_iTwiner <- miscl_cox_tr_iTwiner[runs_itw_tr]
mean(miscl_cox_tr_iTwiner)
## [1] 11.21
median(miscl_cox_tr_iTwiner)
## [1] 11
sd(miscl_cox_tr_iTwiner)
## [1] 2.503714
#iTwiner test
miscl_cox_tes_iTwiner <- miscl_cox_tes[2,]
miscl_cox_tes_iTwiner <- miscl_cox_tes_iTwiner[runs_itw_ts]
mean(miscl_cox_tes_iTwiner)
## [1] 6.352113
median(miscl_cox_tes_iTwiner)
## [1] 6
sd(miscl_cox_tes_iTwiner)
## [1] 0.8468191
#EN
fneg_reg_en_train <- fneg_reg_tr[1,runs_en_tr]
mean(fneg_reg_en_train)
## [1] 7.56
median(fneg_reg_en_train)
## [1] 8
sd(fneg_reg_en_train)
## [1] 2.746237
fneg_reg_en_test <- fneg_reg_ts[1,runs_en_ts]
mean(fneg_reg_en_test)
## [1] 5.787234
median(fneg_reg_en_test)
## [1] 6
sd(fneg_reg_en_test)
## [1] 1.134715
#iTwiner
fneg_reg_iTwiner_train <- fneg_reg_tr[2,runs_itw_tr]
mean(fneg_reg_iTwiner_train)
## [1] 11.21
median(fneg_reg_iTwiner_train)
## [1] 11
sd(fneg_reg_iTwiner_train)
## [1] 2.503714
fneg_reg_iTwiner_test <- fneg_reg_ts[2,runs_itw_ts]
mean(fneg_reg_iTwiner_test)
## [1] 6.239437
median(fneg_reg_iTwiner_test)
## [1] 6
sd(fneg_reg_iTwiner_test)
## [1] 0.7831218
# EN train
sensitivity_cox_tr_EN <- sensitivity_cox_tr[1,]
sensitivity_cox_tr_EN <- sensitivity_cox_tr_EN[runs_en_tr]
mean(sensitivity_cox_tr_EN)
## [1] 0.5552941
median(sensitivity_cox_tr_EN)
## [1] 0.5294118
sd(sensitivity_cox_tr_EN)
## [1] 0.1615434
# EN test
sensitivity_cox_tes_EN <- sensitivity_cox_tes[1,]
sensitivity_cox_tes_EN <- sensitivity_cox_tes_EN[runs_en_ts]
mean(sensitivity_cox_tes_EN)
## [1] 0.2765957
median(sensitivity_cox_tes_EN)
## [1] 0.25
sd(sensitivity_cox_tes_EN)
## [1] 0.1418393
#iTwiner train
sensitivity_cox_tr_iTwiner <- sensitivity_cox_tr[2,]
sensitivity_cox_tr_iTwiner <- sensitivity_cox_tr_iTwiner[runs_itw_tr]
mean(sensitivity_cox_tr_iTwiner)
## [1] 0.3405882
median(sensitivity_cox_tr_iTwiner)
## [1] 0.3529412
sd(sensitivity_cox_tr_iTwiner)
## [1] 0.1472773
#iTwiner test
sensitivity_cox_tes_iTwiner <- sensitivity_cox_tes[2,]
sensitivity_cox_tes_iTwiner <- sensitivity_cox_tes_iTwiner[runs_itw_ts]
mean(sensitivity_cox_tes_iTwiner)
## [1] 0.2200704
median(sensitivity_cox_tes_iTwiner)
## [1] 0.25
sd(sensitivity_cox_tes_iTwiner)
## [1] 0.09789022
# EN train
specificity_cox_tr_EN <- specificity_cox_tr[1,]
specificity_cox_tr_EN <- specificity_cox_tr_EN[runs_en_tr]
mean(specificity_cox_tr_EN)
## [1] 1
median(specificity_cox_tr_EN)
## [1] 1
sd(specificity_cox_tr_EN)
## [1] 0
# EN test
specificity_cox_tes_EN <- specificity_cox_tes[1,]
specificity_cox_tes_EN <- specificity_cox_tes_EN[runs_en_ts]
mean(specificity_cox_tes_EN)
## [1] 0.8392435
median(specificity_cox_tes_EN)
## [1] 0.8888889
sd(specificity_cox_tes_EN)
## [1] 0.1181936
#iTwiner train
specificity_cox_tr_iTwiner <- specificity_cox_tr[2,]
specificity_cox_tr_iTwiner <- specificity_cox_tr_iTwiner[runs_itw_tr]
mean(specificity_cox_tr_iTwiner)
## [1] 1
median(specificity_cox_tr_iTwiner)
## [1] 1
sd(specificity_cox_tr_iTwiner)
## [1] 0
#iTwiner test
specificity_cox_tes_iTwiner <- specificity_cox_tes[2,]
specificity_cox_tes_iTwiner <- specificity_cox_tes_iTwiner[runs_itw_ts]
mean(specificity_cox_tes_iTwiner)
## [1] 0.9874804
median(specificity_cox_tes_iTwiner)
## [1] 1
sd(specificity_cox_tes_iTwiner)
## [1] 0.04005858
# EN train
auc_cox_tr_EN <- auc_cox_tr[1,]
auc_cox_tr_EN <- auc_cox_tr_EN[runs_en_tr]
mean(auc_cox_tr_EN)
## [1] 0.7776471
median(auc_cox_tr_EN)
## [1] 0.7647059
sd(auc_cox_tr_EN)
## [1] 0.08077168
# EN test
auc_cox_tes_EN <- auc_cox_tes[1,]
auc_cox_tes_EN <- auc_cox_tes_EN[runs_en_ts]
mean(auc_cox_tes_EN)
## [1] 0.5611702
median(auc_cox_tes_EN)
## [1] 0.5694444
sd(auc_cox_tes_EN)
## [1] 0.07220669
#iTwiner train
auc_cox_tr_iTwiner <- auc_cox_tr[2,]
auc_cox_tr_iTwiner <- auc_cox_tr_iTwiner[runs_itw_tr]
mean(auc_cox_tr_iTwiner)
## [1] 0.6702941
median(auc_cox_tr_iTwiner)
## [1] 0.6764706
sd(auc_cox_tr_iTwiner)
## [1] 0.07363866
#iTwiner test
auc_cox_tes_iTwiner <- auc_cox_tes[2,]
auc_cox_tes_iTwiner <- auc_cox_tes_iTwiner[runs_itw_ts]
mean(auc_cox_tes_iTwiner)
## [1] 0.6037754
median(auc_cox_tes_iTwiner)
## [1] 0.625
sd(auc_cox_tes_iTwiner)
## [1] 0.05201897
Variables always selected
var_selected_alw_select_en <- var_selected_en1[runs_en_ts]
var_selected_alw_select_en1 <- Reduce(intersect,var_selected_alw_select_en)
print(paste("variables always selected by EN = ",length(var_selected_alw_select_en1)))
## [1] "variables always selected by EN = 0"
var_selected_alw_select_iTwiner <- var_selected_iTwiner1[runs_itw_ts]
var_selected_alw_select_iTwiner1 <- Reduce(intersect,var_selected_alw_select_iTwiner)
print(paste("variables always selected by iTwiner = ",length(var_selected_alw_select_iTwiner1)))
## [1] "variables always selected by iTwiner = 2"
Variables selected in 50 bootstrap samples
l = length(var_selected_alw_select_en)
var_selected_50_select_en <- table(unlist(var_selected_alw_select_en))
var_selected_50_select_en <- as.data.frame(var_selected_50_select_en)
var_selected_50_select_en <- subset(var_selected_50_select_en, Freq > 0.50*l)
print(paste("variables selected 50% by EN = ",length(var_selected_50_select_en$Var1)))
## [1] "variables selected 50% by EN = 6"
var_selected_50_select_en$Var1
## [1] IGBP1P2 LRRC37A14P MIR5002 MIR6829 MIR8078 NMU
## 1014 Levels: ABCF2P2 ABHD16B ABHD17AP6 ACRV1 ACSL6-AS1 ACTBP9 ... ZNHIT1P1
#
l = length(var_selected_alw_select_iTwiner)
var_selected_50_select_iTwiner <- table(unlist(var_selected_alw_select_iTwiner))
var_selected_50_select_iTwiner <- as.data.frame(var_selected_50_select_iTwiner)
var_selected_50_select_iTwiner <- subset(var_selected_50_select_iTwiner, Freq > 0.50*l)
print(paste("variables selected 50% by iTwiner = ",length(var_selected_50_select_iTwiner$Var1)))
## [1] "variables selected 50% by iTwiner = 30"
var_selected_50_select_iTwiner$Var1
## [1] APOOP4 DNTT EEF1B2P6 H2BC2P HORMAD2 HSPE1P4
## [7] IGHVIII-2-1 IQCF5-AS1 KCTD9P3 KDM4F LINC01100 LINC02059
## [13] MIR602 MIR659 MIR8078 NDUFA5P10 NME2P2 NMNAT1P3
## [19] OPCML-IT2 OR1S2 RAC1P3 RNU6-973P RPL21P104 RPL23AP26
## [25] RPL7P12 TDGF1P7 TLR12P TRAJ49 TRBV11-1 VN2R9P
## 308 Levels: ABCD1P3 ALOX15P2 ANKRD20A10P AOX3P-AOX2P APOOP4 ... ZDHHC20P2
var_selected_50_select_en <- table(unlist(var_selected_alw_select_en))
var_selected_50_select_en <- as.data.frame(var_selected_50_select_en)
var_selected_50_select_en <- var_selected_50_select_en[order(var_selected_50_select_en$Freq, decreasing = T),]
hist(var_selected_50_select_en$Freq)
top100_en <- var_selected_50_select_en[1:100,]
top100_en <- top100_en$Var1
top50_en <- top100_en[1:50]
top100_en <- as.data.frame(top100_en)
write_xlsx(top100_en,"List_top100_en_d3.xlsx")
var_selected_50_select_iTwiner <- table(unlist(var_selected_alw_select_iTwiner))
var_selected_50_select_iTwiner <- as.data.frame(var_selected_50_select_iTwiner)
var_selected_50_select_iTwiner <- var_selected_50_select_iTwiner[order(var_selected_50_select_iTwiner$Freq, decreasing = T),]
hist(var_selected_50_select_iTwiner$Freq)
top100_itw <- var_selected_50_select_iTwiner[1:100,]
top100_itw <- top100_itw$Var1
top50_itw <- top100_itw[1:50]
top100_itw <- as.data.frame(top100_itw)
write_xlsx(top100_itw,"List_top100_itw_d3.xlsx")
variables in common between EN and iTwiner
common_var_selected_50_en_iTwiner <- var_selected_50_select_iTwiner$Var1[which(var_selected_50_select_iTwiner$Var1 %in% var_selected_50_select_en$Var1)]
length(common_var_selected_50_en_iTwiner)
## [1] 95
common_var_selected_50_en_iTwiner
## [1] NME2P2 RAC1P3 HORMAD2 OPCML-IT2 LINC01100
## [6] NMNAT1P3 MIR602 KDM4F TRBV11-1 NDUFA5P10
## [11] APOOP4 IQCF5-AS1 MIR8078 MIR659 H2BC2P
## [16] RPL7P12 LINC02059 EEF1B2P6 RNU6-973P TDGF1P7
## [21] RPL23AP26 HSPE1P4 PPP1R14BP4 XRCC6P2 DYTN
## [26] MIR8052 RNU6-179P IGHV3-16 LINC02178 COX5AP1
## [31] DMD-AS3 MIR6729 RNU7-170P MIR323B SNORD42A
## [36] RNU6-1105P RNU6-73P RNU6-902P SEPTIN2P1 LINC01927
## [41] MIR376B MIR609 RNU6-38P RNU7-193P MIR5702
## [46] VWC2L-IT1 HLA-S RPS6P15 BNIP3P36 MTATP8P1
## [51] RNU6-851P MIR4783 RBMY2XP RNA5SP246 EPPIN-WFDC6
## [56] MIR4667 RPS27AP20 SNORD31B MIR7156 RNU6-954P
## [61] TRAV11 DNAJA1P6 DUTP5 IGHJ1 MIR1243
## [66] MIR4493 MIR6782 RNA5SP205 RNU4-75P RNU6-154P
## [71] RNU6-521P RPS4XP11 SELENOTP1 SULT6B2P TDGF1P1
## [76] ANKRD20A10P GAPDHP36 IGHV3OR16-13 LINC02726 MIR4317
## [81] MIR510 MIR587 MIR6083 RNA5SP74 RNU4-13P
## [86] RNU4-88P RNU6-1031P RNU6-1050P RNU6-115P RNU6-1293P
## [91] RNU6-156P RNU6-603P RNU7-154P RPL21P108 RPL5P21
## 308 Levels: ABCD1P3 ALOX15P2 ANKRD20A10P AOX3P-AOX2P APOOP4 ... ZDHHC20P2
xdata <- rnaseq1[ order(row.names(rnaseq1)), ]
nomesgenes <- c(as.vector(top50_en),as.vector(top50_itw))
xdata <- xdata[,nomesgenes]
rownames(DATASET1_bal) <- DATASET1_bal$ID
ydata <- as.data.frame(DATASET1_bal[ order(row.names(DATASET1_bal)), ])
rownames(ydata) <- ydata$ID
xdata <- xdata[rownames(xdata) %in%
rownames(ydata),]
ydata <- as.data.frame(ydata[rownames(ydata) %in%
rownames(xdata),])
# keep features with standard deviation > 0
xdata <- xdata[,sapply(seq(ncol(xdata)), function(ix) {sd(xdata[,ix])}) != 0]
xdata <- t(xdata)
group <- as.factor(ydata$class)
class <- as.data.frame(ydata$class)
edgeR.DGElist <- DGEList(counts=xdata, group = group)
# remove genes that do not have one count per million in at least 5 samples
keep <- rowSums(cpm(edgeR.DGElist) >= 1) >= 5
edgeR.DGElist <- edgeR.DGElist[keep ,]
# specify the design setup
design <- model.matrix(~group)
# estimate the dispersion for all read counts across all samples
edgeR.DGElist <- estimateDisp(edgeR.DGElist, design)
# fit the negative binomial model
edger_fit <- glmFit(edgeR.DGElist, design )
# perform the testing for every gene using the neg. binomial model
edger_lrt <- glmLRT(edger_fit)
summary(decideTests(edger_lrt))
## groupPm
## Down 18
## NotSig 67
## Up 15
# extract results from edger _lrt$ table plus adjusted p- values
DGE.results_edgeR <- topTags(edger_lrt, n = Inf , sort.by = "PValue" , adjust.method = "BH" )
topTags(DGE.results_edgeR) #table with the top10 DEGs
## Coefficient: groupPm
## logFC logCPM LR PValue FDR
## MIR5002 3.743050 11.669347 28.15427 1.120205e-07 1.120205e-05
## MIR4725 2.745083 11.881621 25.33740 4.812863e-07 2.406432e-05
## RNU6-33P 3.157497 13.900931 20.12542 7.252630e-06 2.417543e-04
## OR4K12P 2.690314 9.653652 18.21816 1.969917e-05 4.613801e-04
## MIR8078 2.512305 9.874790 17.57065 2.768280e-05 4.613801e-04
## MIR8078.1 2.512305 9.874790 17.57065 2.768280e-05 4.613801e-04
## MIR5579 2.488928 10.277140 14.68284 1.271988e-04 1.817126e-03
## RNU6-1263P 2.271580 9.975633 13.53384 2.342998e-04 2.581740e-03
## MIR8052 2.048224 9.583558 13.39440 2.523762e-04 2.581740e-03
## HSPE1P4 2.134876 9.581129 13.35181 2.581740e-04 2.581740e-03
genes_deg <- DGE.results_edgeR$table
genes_deg <- genes_deg[which(genes_deg$FDR < 0.05),]
dim(genes_deg)#genes found to be differentially expressed
## [1] 33 5
genes_deg$row <- row.names(genes_deg)
write_xlsx(genes_deg,"genes_deg_enitw_d3.xlsx")
# highly expressed genes
high <- genes_deg[genes_deg$logFC > 0,]
dim(high)
## [1] 15 6
#low expressed genes
low <- genes_deg[genes_deg$logFC < 0,]
dim(low)
## [1] 18 6
xdata <- xdataT
ydata.raw <- as.data.frame(clinic1$class)
ydata.raw$row <- DATASET1$ID
#ydata.raw$class <- c(rep(0,28),rep(1,34))
ydata.raw$`clinic1$class`[ydata.raw$`clinic1$class` == "P"] <- 0
ydata.raw$`clinic1$class`[ydata.raw$`clinic1$class` == "Pm"] <- 1
rownames(ydata.raw) <- DATASET1$ID
xdata <- xdata[rownames(xdata) %in%
rownames(ydata.raw),]
ydata.raw <- as.data.frame(ydata.raw[rownames(ydata.raw) %in%
rownames(xdata),])
xdata <- xdata[ order(row.names(xdata)), ]
ydata.raw <- ydata.raw[ order(row.names(ydata.raw)), ]
ydata <- as.data.frame(ydata.raw[,1:2])
colnames(ydata) <- c("class","id")
names <- as.vector(top50_en)
xdata_en <- as.data.frame(xdata[,names])
nomesgenes <- colnames(xdata_en)
colnames(xdata_en) <- paste0("Var", 1:50)
xdata_en$type <- as.factor(ydata$class)
ydata$class <- as.numeric(ydata$class)
## [1] 1
## [1] 1011
## [1] 2
## [1] 1022
## [1] 3
## [1] 1033
## [1] 4
## [1] 1044
## [1] 5
## [1] 1055
## [1] 6
## [1] 1066
## [1] 7
## [1] 1077
## [1] 8
## [1] 1088
## [1] 9
## [1] 1099
## [1] 10
## [1] 1110
## [1] 11
## [1] 1121
## [1] 12
## [1] 1132
## [1] 13
## [1] 1143
## [1] 14
## [1] 1154
## [1] 15
## [1] 1165
## [1] 16
## [1] 1176
## [1] 17
## [1] 1187
## [1] 18
## [1] 1198
## [1] 19
## [1] 1209
## [1] 20
## [1] 1220
## [1] 21
## [1] 1231
## [1] 22
## [1] 1242
## [1] 23
## [1] 1253
## [1] 24
## [1] 1264
## [1] 25
## [1] 1275
## [1] 26
## [1] 1286
## [1] 27
## [1] 1297
## [1] 28
## [1] 1308
## [1] 29
## [1] 1319
## [1] 30
## [1] 1330
## [1] 31
## [1] 1341
## [1] 32
## [1] 1352
## [1] 33
## [1] 1363
## [1] 34
## [1] 1374
## [1] 35
## [1] 1385
## [1] 36
## [1] 1396
## [1] 37
## [1] 1407
## [1] 38
## [1] 1418
## [1] 39
## [1] 1429
## [1] 40
## [1] 1440
## [1] 41
## [1] 1451
## [1] 42
## [1] 1462
## [1] 43
## [1] 1473
## [1] 44
## [1] 1484
## [1] 45
## [1] 1495
## [1] 46
## [1] 1506
## [1] 47
## [1] 1517
## [1] 48
## [1] 1528
## [1] 49
## [1] 1539
## [1] 50
## [1] 1550
## [1] 51
## [1] 1561
## [1] 52
## [1] 1572
## [1] 53
## [1] 1583
## [1] 54
## [1] 1594
## [1] 55
## [1] 1605
## [1] 56
## [1] 1616
## [1] 57
## [1] 1627
## [1] 58
## [1] 1638
## [1] 59
## [1] 1649
## [1] 60
## [1] 1660
## [1] 61
## [1] 1671
## [1] 62
## [1] 1682
## [1] 63
## [1] 1693
## [1] 64
## [1] 1704
## [1] 65
## [1] 1715
## [1] 66
## [1] 1726
## [1] 67
## [1] 1737
## [1] 68
## [1] 1748
## [1] 69
## [1] 1759
## [1] 70
## [1] 1770
## [1] 71
## [1] 1781
## [1] 72
## [1] 1792
## [1] 73
## [1] 1803
## [1] 74
## [1] 1814
## [1] 75
## [1] 1825
## [1] 76
## [1] 1836
## [1] 77
## [1] 1847
## [1] 78
## [1] 1858
## [1] 79
## [1] 1869
## [1] 80
## [1] 1880
## [1] 81
## [1] 1891
## [1] 82
## [1] 1902
## [1] 83
## [1] 1913
## [1] 84
## [1] 1924
## [1] 85
## [1] 1935
## [1] 86
## [1] 1946
## [1] 87
## [1] 1957
## [1] 88
## [1] 1968
## [1] 89
## [1] 1979
## [1] 90
## [1] 1990
## [1] 91
## [1] 2001
## [1] 92
## [1] 2012
## [1] 93
## [1] 2023
## [1] 94
## [1] 2034
## [1] 95
## [1] 2045
## [1] 96
## [1] 2056
## [1] 97
## [1] 2067
## [1] 98
## [1] 2078
## [1] 99
## [1] 2089
## [1] 100
## [1] 2100
# acc
acc_trees <- acc_enplus_train[1,]
#hist(acc_trees)
mean(acc_trees)
## [1] 0.9934211
median(acc_trees)
## [1] 1
sd(acc_trees)
## [1] 0.01261507
acc_svm <- acc_enplus_train[2,]
#hist(acc_svm)
mean(acc_svm)
## [1] 0.9968421
median(acc_svm)
## [1] 1
sd(acc_svm)
## [1] 0.008594701
acc_svmR <- acc_enplus_train[3,]
#hist(acc_svmR)
mean(acc_svmR)
## [1] 0.9505263
median(acc_svmR)
## [1] 0.9736842
sd(acc_svmR)
## [1] 0.05940941
acc_logs <- acc_enplus_train[4,]
#hist(acc_logs)
mean(acc_logs)
## [1] 1
median(acc_logs)
## [1] 1
sd(acc_logs)
## [1] 0
acc_rf <- acc_enplus_train[5,]
#hist(acc_rf)
mean(acc_rf)
## [1] 1
median(acc_rf)
## [1] 1
sd(acc_rf)
## [1] 0
# auc
auc_trees <- auc_enplus_train[1,]
#hist(auc_trees)
mean(auc_trees)
## [1] 0.9933754
median(auc_trees)
## [1] 1
sd(auc_trees)
## [1] 0.01277733
auc_svm <- auc_enplus_train[2,]
#hist(auc_svm)
mean(auc_svm)
## [1] 0.9969188
median(auc_svm)
## [1] 1
sd(auc_svm)
## [1] 0.008436307
auc_svmR <- auc_enplus_train[3,]
#hist(auc_svmR)
mean(auc_svmR)
## [1] 0.9448179
median(auc_svmR)
## [1] 0.9705882
sd(auc_svmR)
## [1] 0.06639741
auc_logs <- auc_enplus_train[4,]
#hist(auc_logs)
mean(auc_logs)
## [1] 1
median(auc_logs)
## [1] 1
sd(auc_logs)
## [1] 0
auc_rf <- auc_enplus_train[5,]
#hist(auc_rf)
mean(auc_rf)
## [1] 1
median(auc_rf)
## [1] 1
sd(auc_rf)
## [1] 0
# miscl
miscl_trees <- miscl_enplus_train[1,]
#hist(miscl_trees)
mean(miscl_trees)
## [1] 0.25
median(miscl_trees)
## [1] 0
sd(miscl_trees)
## [1] 0.4793725
miscl_svm <- miscl_enplus_train[2,]
#hist(miscl_svm)
mean(miscl_svm)
## [1] 0.12
median(miscl_svm)
## [1] 0
sd(miscl_svm)
## [1] 0.3265986
miscl_svmR <- miscl_enplus_train[3,]
#hist(miscl_svmR)
mean(miscl_svmR)
## [1] 1.88
median(miscl_svmR)
## [1] 1
sd(miscl_svmR)
## [1] 2.257557
miscl_logs <- miscl_enplus_train[4,]
#hist(miscl_logs)
mean(miscl_logs)
## [1] 0
median(miscl_logs)
## [1] 0
sd(miscl_logs)
## [1] 0
miscl_rf <- miscl_enplus_train[5,]
#hist(miscl_rf)
mean(miscl_rf)
## [1] 0
median(miscl_rf)
## [1] 0
sd(miscl_rf)
## [1] 0
# sensitivity
sensitivity_trees <- sensitivity_enplus_train[1,]
#hist(sensitivity_trees)
mean(sensitivity_trees)
## [1] 0.9929412
median(sensitivity_trees)
## [1] 1
sd(sensitivity_trees)
## [1] 0.01921168
sensitivity_svm <- sensitivity_enplus_train[2,]
#hist(sensitivity_svm)
mean(sensitivity_svm)
## [1] 0.9976471
median(sensitivity_svm)
## [1] 1
sd(sensitivity_svm)
## [1] 0.01158508
sensitivity_svmR <- sensitivity_enplus_train[3,]
#hist(sensitivity_svmR)
mean(sensitivity_svmR)
## [1] 0.8905882
median(sensitivity_svmR)
## [1] 0.9411765
sd(sensitivity_svmR)
## [1] 0.1329921
sensitivity_logs <- sensitivity_enplus_train[4,]
#hist(sensitivity_logs)
mean(sensitivity_logs)
## [1] 1
median(sensitivity_logs)
## [1] 1
sd(sensitivity_logs)
## [1] 0
sensitivity_rf <- sensitivity_enplus_train[5,]
#hist(sensitivity_rf)
mean(sensitivity_rf)
## [1] 1
median(sensitivity_rf)
## [1] 1
sd(sensitivity_rf)
## [1] 0
# specificity
specificity_trees <- specificity_enplus_train[1,]
#hist(specificity_trees)
mean(specificity_trees)
## [1] 0.9938095
median(specificity_trees)
## [1] 1
sd(specificity_trees)
## [1] 0.01609513
specificity_svm <- specificity_enplus_train[2,]
#hist(specificity_svm)
mean(specificity_svm)
## [1] 0.9961905
median(specificity_svm)
## [1] 1
sd(specificity_svm)
## [1] 0.01298381
specificity_svmR <- specificity_enplus_train[3,]
#hist(specificity_svmR)
mean(specificity_svmR)
## [1] 0.9990476
median(specificity_svmR)
## [1] 1
sd(specificity_svmR)
## [1] 0.006700252
specificity_logs <- specificity_enplus_train[4,]
#hist(specificity_logs)
mean(specificity_logs)
## [1] 1
median(specificity_logs)
## [1] 1
sd(specificity_logs)
## [1] 0
specificity_rf <- specificity_enplus_train[5,]
#hist(specificity_rf)
mean(specificity_rf)
## [1] 1
median(specificity_rf)
## [1] 1
sd(specificity_rf)
## [1] 0
# fneg
fneg_trees <- fneg_enplus_train[1,]
#hist(fneg_trees)
mean(fneg_trees)
## [1] 0.12
median(fneg_trees)
## [1] 0
sd(fneg_trees)
## [1] 0.3265986
fneg_svm <- fneg_enplus_train[2,]
#hist(fneg_svm)
mean(fneg_svm)
## [1] 0.04
median(fneg_svm)
## [1] 0
sd(fneg_svm)
## [1] 0.1969464
fneg_svmR <- fneg_enplus_train[3,]
#hist(fneg_svmR)
mean(fneg_svmR)
## [1] 1.86
median(fneg_svmR)
## [1] 1
sd(fneg_svmR)
## [1] 2.260866
fneg_logs <- fneg_enplus_train[4,]
#hist(fneg_logs)
mean(fneg_logs)
## [1] 0
median(fneg_logs)
## [1] 0
sd(fneg_logs)
## [1] 0
fneg_rf <- fneg_enplus_train[5,]
#hist(fneg_rf)
mean(fneg_rf)
## [1] 0
median(fneg_rf)
## [1] 0
sd(fneg_rf)
## [1] 0
# acc
acc_trees <- acc_enplus[1,]
#hist(acc_trees)
mean(acc_trees)
## [1] 0.6511765
median(acc_trees)
## [1] 0.6470588
sd(acc_trees)
## [1] 0.1189045
acc_svm <- acc_enplus[2,]
#hist(acc_svm)
mean(acc_svm)
## [1] 0.7264706
median(acc_svm)
## [1] 0.7058824
sd(acc_svm)
## [1] 0.09286714
acc_svmR <- acc_enplus[3,]
#hist(acc_svmR)
mean(acc_svmR)
## [1] 0.7370588
median(acc_svmR)
## [1] 0.7647059
sd(acc_svmR)
## [1] 0.08707095
acc_logs <- acc_enplus[4,]
#hist(acc_logs)
mean(acc_logs)
## [1] 0.7570588
median(acc_logs)
## [1] 0.7647059
sd(acc_logs)
## [1] 0.09030258
acc_rf <- acc_enplus[5,]
#hist(acc_rf)
mean(acc_rf)
## [1] 0.7782353
median(acc_rf)
## [1] 0.7647059
sd(acc_rf)
## [1] 0.0814289
# auc
auc_trees <- auc_enplus[1,]
#hist(auc_trees)
mean(auc_trees)
## [1] 0.6635417
median(auc_trees)
## [1] 0.6527778
sd(auc_trees)
## [1] 0.09839943
auc_svm <- auc_enplus[2,]
#hist(auc_svm)
mean(auc_svm)
## [1] 0.7247222
median(auc_svm)
## [1] 0.7118056
sd(auc_svm)
## [1] 0.09112109
auc_svmR <- auc_enplus[3,]
#hist(auc_svmR)
mean(auc_svmR)
## [1] 0.7351389
median(auc_svmR)
## [1] 0.7569444
sd(auc_svmR)
## [1] 0.08763573
auc_logs <- auc_enplus[4,]
#hist(auc_logs)
mean(auc_logs)
## [1] 0.7547222
median(auc_logs)
## [1] 0.7569444
sd(auc_logs)
## [1] 0.09145386
auc_rf <- auc_enplus[5,]
#hist(auc_rf)
mean(auc_rf)
## [1] 0.769375
median(auc_rf)
## [1] 0.7569444
sd(auc_rf)
## [1] 0.08473783
# miscl
miscl_trees <- miscl_enplus[1,]
#hist(miscl_trees)
mean(miscl_trees)
## [1] 5.93
median(miscl_trees)
## [1] 6
sd(miscl_trees)
## [1] 2.021376
miscl_svm <- miscl_enplus[2,]
#hist(miscl_svm)
mean(miscl_svm)
## [1] 4.65
median(miscl_svm)
## [1] 5
sd(miscl_svm)
## [1] 1.578741
miscl_svmR <- miscl_enplus[3,]
#hist(miscl_svmR)
mean(miscl_svmR)
## [1] 4.47
median(miscl_svmR)
## [1] 4
sd(miscl_svmR)
## [1] 1.480206
miscl_logs <- miscl_enplus[4,]
#hist(miscl_logs)
mean(miscl_logs)
## [1] 4.13
median(miscl_logs)
## [1] 4
sd(miscl_logs)
## [1] 1.535144
miscl_rf <- miscl_enplus[5,]
#hist(miscl_rf)
mean(miscl_rf)
## [1] 3.77
median(miscl_rf)
## [1] 4
sd(miscl_rf)
## [1] 1.384291
# sensitivity
sensitivity_trees <- sensitivity_enplus[1,]
#hist(sensitivity_trees)
mean(sensitivity_trees)
## [1] 0.62125
median(sensitivity_trees)
## [1] 0.625
sd(sensitivity_trees)
## [1] 0.1753919
sensitivity_svm <- sensitivity_enplus[2,]
#hist(sensitivity_svm)
mean(sensitivity_svm)
## [1] 0.665
median(sensitivity_svm)
## [1] 0.625
sd(sensitivity_svm)
## [1] 0.1828161
sensitivity_svmR <- sensitivity_enplus[3,]
#hist(sensitivity_svmR)
mean(sensitivity_svmR)
## [1] 0.7025
median(sensitivity_svmR)
## [1] 0.75
sd(sensitivity_svmR)
## [1] 0.1711333
sensitivity_logs <- sensitivity_enplus[4,]
#hist(sensitivity_logs)
mean(sensitivity_logs)
## [1] 0.715
median(sensitivity_logs)
## [1] 0.75
sd(sensitivity_logs)
## [1] 0.1629123
sensitivity_rf <- sensitivity_enplus[5,]
#hist(sensitivity_rf)
mean(sensitivity_rf)
## [1] 0.61875
median(sensitivity_rf)
## [1] 0.625
sd(sensitivity_rf)
## [1] 0.1689007
# specificity
specificity_trees <- specificity_enplus[1,]
#hist(specificity_trees)
mean(specificity_trees)
## [1] 0.6777778
median(specificity_trees)
## [1] 0.6666667
sd(specificity_trees)
## [1] 0.1719151
specificity_svm <- specificity_enplus[2,]
#hist(specificity_svm)
mean(specificity_svm)
## [1] 0.7811111
median(specificity_svm)
## [1] 0.7777778
sd(specificity_svm)
## [1] 0.1297066
specificity_svmR <- specificity_enplus[3,]
#hist(specificity_svmR)
mean(specificity_svmR)
## [1] 0.7677778
median(specificity_svmR)
## [1] 0.7777778
sd(specificity_svmR)
## [1] 0.1540046
specificity_logs <- specificity_enplus[4,]
#hist(specificity_logs)
mean(specificity_logs)
## [1] 0.7944444
median(specificity_logs)
## [1] 0.7777778
sd(specificity_logs)
## [1] 0.1334222
specificity_rf <- specificity_enplus[5,]
#hist(specificity_rf)
mean(specificity_rf)
## [1] 0.92
median(specificity_rf)
## [1] 0.8888889
sd(specificity_rf)
## [1] 0.09219443
# fneg
fneg_trees <- fneg_enplus[1,]
#hist(fneg_trees)
mean(fneg_trees)
## [1] 3.03
median(fneg_trees)
## [1] 3
sd(fneg_trees)
## [1] 1.403135
fneg_svm <- fneg_enplus[2,]
#hist(fneg_svm)
mean(fneg_svm)
## [1] 2.68
median(fneg_svm)
## [1] 3
sd(fneg_svm)
## [1] 1.462529
fneg_svmR <- fneg_enplus[3,]
#hist(fneg_svmR)
mean(fneg_svmR)
## [1] 2.38
median(fneg_svmR)
## [1] 2
sd(fneg_svmR)
## [1] 1.369067
fneg_logs <- fneg_enplus[4,]
#hist(fneg_logs)
mean(fneg_logs)
## [1] 2.28
median(fneg_logs)
## [1] 2
sd(fneg_logs)
## [1] 1.303298
fneg_rf <- fneg_enplus[5,]
#hist(fneg_rf)
mean(fneg_rf)
## [1] 3.05
median(fneg_rf)
## [1] 3
sd(fneg_rf)
## [1] 1.351206
xdata <- xdataT
ydata.raw <- as.data.frame(clinic1$class)
ydata.raw$row <- DATASET1$ID
#ydata.raw$class <- c(rep(0,28),rep(1,34))
ydata.raw$`clinic1$class`[ydata.raw$`clinic1$class` == "P"] <- 0
ydata.raw$`clinic1$class`[ydata.raw$`clinic1$class` == "Pm"] <- 1
rownames(ydata.raw) <- DATASET1$ID
xdata <- xdata[rownames(xdata) %in%
rownames(ydata.raw),]
ydata.raw <- as.data.frame(ydata.raw[rownames(ydata.raw) %in%
rownames(xdata),])
xdata <- xdata[ order(row.names(xdata)), ]
ydata.raw <- ydata.raw[ order(row.names(ydata.raw)), ]
ydata <- as.data.frame(ydata.raw[,1:2])
colnames(ydata) <- c("class","id")
names <- as.vector(top50_itw)
xdata_iTwiner <- as.data.frame(xdata[,names])
nomesgenes <- colnames(xdata_iTwiner)
colnames(xdata_iTwiner) <- paste0("Var", 1:50)
xdata_iTwiner$type <- as.factor(ydata$class)
ydata$class <- as.numeric(ydata$class)
## [1] 1
## [1] 1011
## [1] 2
## [1] 1022
## [1] 3
## [1] 1033
## [1] 4
## [1] 1044
## [1] 5
## [1] 1055
## [1] 6
## [1] 1066
## [1] 7
## [1] 1077
## [1] 8
## [1] 1088
## [1] 9
## [1] 1099
## [1] 10
## [1] 1110
## [1] 11
## [1] 1121
## [1] 12
## [1] 1132
## [1] 13
## [1] 1143
## [1] 14
## [1] 1154
## [1] 15
## [1] 1165
## [1] 16
## [1] 1176
## [1] 17
## [1] 1187
## [1] 18
## [1] 1198
## [1] 19
## [1] 1209
## [1] 20
## [1] 1220
## [1] 21
## [1] 1231
## [1] 22
## [1] 1242
## [1] 23
## [1] 1253
## [1] 24
## [1] 1264
## [1] 25
## [1] 1275
## [1] 26
## [1] 1286
## [1] 27
## [1] 1297
## [1] 28
## [1] 1308
## [1] 29
## [1] 1319
## [1] 30
## [1] 1330
## [1] 31
## [1] 1341
## [1] 32
## [1] 1352
## [1] 33
## [1] 1363
## [1] 34
## [1] 1374
## [1] 35
## [1] 1385
## [1] 36
## [1] 1396
## [1] 37
## [1] 1407
## [1] 38
## [1] 1418
## [1] 39
## [1] 1429
## [1] 40
## [1] 1440
## [1] 41
## [1] 1451
## [1] 42
## [1] 1462
## [1] 43
## [1] 1473
## [1] 44
## [1] 1484
## [1] 45
## [1] 1495
## [1] 46
## [1] 1506
## [1] 47
## [1] 1517
## [1] 48
## [1] 1528
## [1] 49
## [1] 1539
## [1] 50
## [1] 1550
## [1] 51
## [1] 1561
## [1] 52
## [1] 1572
## [1] 53
## [1] 1583
## [1] 54
## [1] 1594
## [1] 55
## [1] 1605
## [1] 56
## [1] 1616
## [1] 57
## [1] 1627
## [1] 58
## [1] 1638
## [1] 59
## [1] 1649
## [1] 60
## [1] 1660
## [1] 61
## [1] 1671
## [1] 62
## [1] 1682
## [1] 63
## [1] 1693
## [1] 64
## [1] 1704
## [1] 65
## [1] 1715
## [1] 66
## [1] 1726
## [1] 67
## [1] 1737
## [1] 68
## [1] 1748
## [1] 69
## [1] 1759
## [1] 70
## [1] 1770
## [1] 71
## [1] 1781
## [1] 72
## [1] 1792
## [1] 73
## [1] 1803
## [1] 74
## [1] 1814
## [1] 75
## [1] 1825
## [1] 76
## [1] 1836
## [1] 77
## [1] 1847
## [1] 78
## [1] 1858
## [1] 79
## [1] 1869
## [1] 80
## [1] 1880
## [1] 81
## [1] 1891
## [1] 82
## [1] 1902
## [1] 83
## [1] 1913
## [1] 84
## [1] 1924
## [1] 85
## [1] 1935
## [1] 86
## [1] 1946
## [1] 87
## [1] 1957
## [1] 88
## [1] 1968
## [1] 89
## [1] 1979
## [1] 90
## [1] 1990
## [1] 91
## [1] 2001
## [1] 92
## [1] 2012
## [1] 93
## [1] 2023
## [1] 94
## [1] 2034
## [1] 95
## [1] 2045
## [1] 96
## [1] 2056
## [1] 97
## [1] 2067
## [1] 98
## [1] 2078
## [1] 99
## [1] 2089
## [1] 100
## [1] 2100
# acc
acc_trees <- acc_tcoxplus_train[1,]
#hist(acc_trees)
mean(acc_trees)
## [1] 0.9944737
median(acc_trees)
## [1] 1
sd(acc_trees)
## [1] 0.01140351
acc_svm <- acc_tcoxplus_train[2,]
#hist(acc_svm)
mean(acc_svm)
## [1] 0.8486842
median(acc_svm)
## [1] 0.8421053
sd(acc_svm)
## [1] 0.0351624
acc_svmR <- acc_tcoxplus_train[3,]
#hist(acc_svmR)
mean(acc_svmR)
## [1] 0.7589474
median(acc_svmR)
## [1] 0.8421053
sd(acc_svmR)
## [1] 0.1467214
acc_logs <- acc_tcoxplus_train[4,]
#hist(acc_logs)
mean(acc_logs)
## [1] 1
median(acc_logs)
## [1] 1
sd(acc_logs)
## [1] 0
acc_rf <- acc_tcoxplus_train[5,]
#hist(acc_rf)
mean(acc_rf)
## [1] 0.9978947
median(acc_rf)
## [1] 1
sd(acc_rf)
## [1] 0.007175261
# auc
auc_trees <- auc_tcoxplus_train[1,]
#hist(auc_trees)
mean(auc_trees)
## [1] 0.9945518
median(auc_trees)
## [1] 1
sd(auc_trees)
## [1] 0.01133615
auc_svm <- auc_tcoxplus_train[2,]
#hist(auc_svm)
mean(auc_svm)
## [1] 0.8316667
median(auc_svm)
## [1] 0.8235294
sd(auc_svm)
## [1] 0.03907989
auc_svmR <- auc_tcoxplus_train[3,]
#hist(auc_svmR)
mean(auc_svmR)
## [1] 0.7307003
median(auc_svmR)
## [1] 0.8235294
sd(auc_svmR)
## [1] 0.1640102
auc_logs <- auc_tcoxplus_train[4,]
#hist(auc_logs)
mean(auc_logs)
## [1] 1
median(auc_logs)
## [1] 1
sd(auc_logs)
## [1] 0
auc_rf <- auc_tcoxplus_train[5,]
#hist(auc_rf)
mean(auc_rf)
## [1] 0.9976471
median(auc_rf)
## [1] 1
sd(auc_rf)
## [1] 0.00801941
# miscl
miscl_trees <- miscl_tcoxplus_train[1,]
#hist(miscl_trees)
mean(miscl_trees)
## [1] 0.21
median(miscl_trees)
## [1] 0
sd(miscl_trees)
## [1] 0.4333333
miscl_svm <- miscl_tcoxplus_train[2,]
#hist(miscl_svm)
mean(miscl_svm)
## [1] 5.75
median(miscl_svm)
## [1] 6
sd(miscl_svm)
## [1] 1.336171
miscl_svmR <- miscl_tcoxplus_train[3,]
#hist(miscl_svmR)
mean(miscl_svmR)
## [1] 9.16
median(miscl_svmR)
## [1] 6
sd(miscl_svmR)
## [1] 5.575415
miscl_logs <- miscl_tcoxplus_train[4,]
#hist(miscl_logs)
mean(miscl_logs)
## [1] 0
median(miscl_logs)
## [1] 0
sd(miscl_logs)
## [1] 0
miscl_rf <- miscl_tcoxplus_train[5,]
#hist(miscl_rf)
mean(miscl_rf)
## [1] 0.08
median(miscl_rf)
## [1] 0
sd(miscl_rf)
## [1] 0.2726599
# sensitivity
sensitivity_trees <- sensitivity_tcoxplus_train[1,]
#hist(sensitivity_trees)
mean(sensitivity_trees)
## [1] 0.9952941
median(sensitivity_trees)
## [1] 1
sd(sensitivity_trees)
## [1] 0.01603882
sensitivity_svm <- sensitivity_tcoxplus_train[2,]
#hist(sensitivity_svm)
mean(sensitivity_svm)
## [1] 0.67
median(sensitivity_svm)
## [1] 0.6470588
sd(sensitivity_svm)
## [1] 0.0788292
sensitivity_svmR <- sensitivity_tcoxplus_train[3,]
#hist(sensitivity_svmR)
mean(sensitivity_svmR)
## [1] 0.4623529
median(sensitivity_svmR)
## [1] 0.6470588
sd(sensitivity_svmR)
## [1] 0.3284235
sensitivity_logs <- sensitivity_tcoxplus_train[4,]
#hist(sensitivity_logs)
mean(sensitivity_logs)
## [1] 1
median(sensitivity_logs)
## [1] 1
sd(sensitivity_logs)
## [1] 0
sensitivity_rf <- sensitivity_tcoxplus_train[5,]
#hist(sensitivity_rf)
mean(sensitivity_rf)
## [1] 0.9952941
median(sensitivity_rf)
## [1] 1
sd(sensitivity_rf)
## [1] 0.01603882
# specificity
specificity_trees <- specificity_tcoxplus_train[1,]
#hist(specificity_trees)
mean(specificity_trees)
## [1] 0.9938095
median(specificity_trees)
## [1] 1
sd(specificity_trees)
## [1] 0.01609513
specificity_svm <- specificity_tcoxplus_train[2,]
#hist(specificity_svm)
mean(specificity_svm)
## [1] 0.9933333
median(specificity_svm)
## [1] 1
sd(specificity_svm)
## [1] 0.01793274
specificity_svmR <- specificity_tcoxplus_train[3,]
#hist(specificity_svmR)
mean(specificity_svmR)
## [1] 0.9990476
median(specificity_svmR)
## [1] 1
sd(specificity_svmR)
## [1] 0.00952381
specificity_logs <- specificity_tcoxplus_train[4,]
#hist(specificity_logs)
mean(specificity_logs)
## [1] 1
median(specificity_logs)
## [1] 1
sd(specificity_logs)
## [1] 0
specificity_rf <- specificity_tcoxplus_train[5,]
#hist(specificity_rf)
mean(specificity_rf)
## [1] 1
median(specificity_rf)
## [1] 1
sd(specificity_rf)
## [1] 0
# fneg
fneg_trees <- fneg_tcoxplus_train[1,]
#hist(fneg_trees)
mean(fneg_trees)
## [1] 0.08
median(fneg_trees)
## [1] 0
sd(fneg_trees)
## [1] 0.2726599
fneg_svm <- fneg_tcoxplus_train[2,]
#hist(fneg_svm)
mean(fneg_svm)
## [1] 5.61
median(fneg_svm)
## [1] 6
sd(fneg_svm)
## [1] 1.340096
fneg_svmR <- fneg_tcoxplus_train[3,]
#hist(fneg_svmR)
mean(fneg_svmR)
## [1] 9.14
median(fneg_svmR)
## [1] 6
sd(fneg_svmR)
## [1] 5.5832
fneg_logs <- fneg_tcoxplus_train[4,]
#hist(fneg_logs)
mean(fneg_logs)
## [1] 0
median(fneg_logs)
## [1] 0
sd(fneg_logs)
## [1] 0
fneg_rf <- fneg_tcoxplus_train[5,]
#hist(fneg_rf)
mean(fneg_rf)
## [1] 0.08
median(fneg_rf)
## [1] 0
sd(fneg_rf)
## [1] 0.2726599
# acc
acc_trees <- acc_tcoxplus[1,]
#hist(acc_trees)
mean(acc_trees)
## [1] 0.6747059
median(acc_trees)
## [1] 0.6470588
sd(acc_trees)
## [1] 0.1107468
acc_svm <- acc_tcoxplus[2,]
#hist(acc_svm)
mean(acc_svm)
## [1] 0.7152941
median(acc_svm)
## [1] 0.7058824
sd(acc_svm)
## [1] 0.09224216
acc_svmR <- acc_tcoxplus[3,]
#hist(acc_svmR)
mean(acc_svmR)
## [1] 0.6041176
median(acc_svmR)
## [1] 0.5882353
sd(acc_svmR)
## [1] 0.1086439
acc_logs <- acc_tcoxplus[4,]
#hist(acc_logs)
mean(acc_logs)
## [1] 0.6235294
median(acc_logs)
## [1] 0.6470588
sd(acc_logs)
## [1] 0.09605842
acc_rf <- acc_tcoxplus[5,]
#hist(acc_rf)
mean(acc_rf)
## [1] 0.7376471
median(acc_rf)
## [1] 0.7647059
sd(acc_rf)
## [1] 0.0745098
# auc
auc_trees <- auc_tcoxplus[1,]
#hist(auc_trees)
mean(auc_trees)
## [1] 0.6727083
median(auc_trees)
## [1] 0.6458333
sd(auc_trees)
## [1] 0.1080286
auc_svm <- auc_tcoxplus[2,]
#hist(auc_svm)
mean(auc_svm)
## [1] 0.7053472
median(auc_svm)
## [1] 0.6875
sd(auc_svm)
## [1] 0.08440352
auc_svmR <- auc_tcoxplus[3,]
#hist(auc_svmR)
mean(auc_svmR)
## [1] 0.5975694
median(auc_svmR)
## [1] 0.5833333
sd(auc_svmR)
## [1] 0.1069533
auc_logs <- auc_tcoxplus[4,]
#hist(auc_logs)
mean(auc_logs)
## [1] 0.6215278
median(auc_logs)
## [1] 0.625
sd(auc_logs)
## [1] 0.09167528
auc_rf <- auc_tcoxplus[5,]
#hist(auc_rf)
mean(auc_rf)
## [1] 0.7248611
median(auc_rf)
## [1] 0.75
sd(auc_rf)
## [1] 0.07620552
# miscl
miscl_trees <- miscl_tcoxplus[1,]
#hist(miscl_trees)
mean(miscl_trees)
## [1] 5.53
median(miscl_trees)
## [1] 6
sd(miscl_trees)
## [1] 1.882696
miscl_svm <- miscl_tcoxplus[2,]
#hist(miscl_svm)
mean(miscl_svm)
## [1] 4.84
median(miscl_svm)
## [1] 5
sd(miscl_svm)
## [1] 1.568117
miscl_svmR <- miscl_tcoxplus[3,]
#hist(miscl_svmR)
mean(miscl_svmR)
## [1] 6.73
median(miscl_svmR)
## [1] 7
sd(miscl_svmR)
## [1] 1.846947
miscl_logs <- miscl_tcoxplus[4,]
#hist(miscl_logs)
mean(miscl_logs)
## [1] 6.4
median(miscl_logs)
## [1] 6
sd(miscl_logs)
## [1] 1.632993
miscl_rf <- miscl_tcoxplus[5,]
#hist(miscl_rf)
mean(miscl_rf)
## [1] 4.46
median(miscl_rf)
## [1] 4
sd(miscl_rf)
## [1] 1.266667
# sensitivity
sensitivity_trees <- sensitivity_tcoxplus[1,]
#hist(sensitivity_trees)
mean(sensitivity_trees)
## [1] 0.59625
median(sensitivity_trees)
## [1] 0.625
sd(sensitivity_trees)
## [1] 0.1870618
sensitivity_svm <- sensitivity_tcoxplus[2,]
#hist(sensitivity_svm)
mean(sensitivity_svm)
## [1] 0.45875
median(sensitivity_svm)
## [1] 0.5
sd(sensitivity_svm)
## [1] 0.1569668
sensitivity_svmR <- sensitivity_tcoxplus[3,]
#hist(sensitivity_svmR)
mean(sensitivity_svmR)
## [1] 0.37375
median(sensitivity_svmR)
## [1] 0.5
sd(sensitivity_svmR)
## [1] 0.2823149
sensitivity_logs <- sensitivity_tcoxplus[4,]
#hist(sensitivity_logs)
mean(sensitivity_logs)
## [1] 0.525
median(sensitivity_logs)
## [1] 0.5
sd(sensitivity_logs)
## [1] 0.1820548
sensitivity_rf <- sensitivity_tcoxplus[5,]
#hist(sensitivity_rf)
mean(sensitivity_rf)
## [1] 0.5075
median(sensitivity_rf)
## [1] 0.5
sd(sensitivity_rf)
## [1] 0.1419338
# specificity
specificity_trees <- specificity_tcoxplus[1,]
#hist(specificity_trees)
mean(specificity_trees)
## [1] 0.7444444
median(specificity_trees)
## [1] 0.7777778
sd(specificity_trees)
## [1] 0.1591065
specificity_svm <- specificity_tcoxplus[2,]
#hist(specificity_svm)
mean(specificity_svm)
## [1] 0.9433333
median(specificity_svm)
## [1] 1
sd(specificity_svm)
## [1] 0.1029494
specificity_svmR <- specificity_tcoxplus[3,]
#hist(specificity_svmR)
mean(specificity_svmR)
## [1] 0.8088889
median(specificity_svmR)
## [1] 0.8888889
sd(specificity_svmR)
## [1] 0.2136856
specificity_logs <- specificity_tcoxplus[4,]
#hist(specificity_logs)
mean(specificity_logs)
## [1] 0.7111111
median(specificity_logs)
## [1] 0.6666667
sd(specificity_logs)
## [1] 0.1729998
specificity_rf <- specificity_tcoxplus[5,]
#hist(specificity_rf)
mean(specificity_rf)
## [1] 0.9422222
median(specificity_rf)
## [1] 1
sd(specificity_rf)
## [1] 0.1017125
# fneg
fneg_trees <- fneg_tcoxplus[1,]
#hist(fneg_trees)
mean(fneg_trees)
## [1] 3.23
median(fneg_trees)
## [1] 3
sd(fneg_trees)
## [1] 1.496494
fneg_svm <- fneg_tcoxplus[2,]
#hist(fneg_svm)
mean(fneg_svm)
## [1] 4.33
median(fneg_svm)
## [1] 4
sd(fneg_svm)
## [1] 1.255734
fneg_svmR <- fneg_tcoxplus[3,]
#hist(fneg_svmR)
mean(fneg_svmR)
## [1] 5.01
median(fneg_svmR)
## [1] 4
sd(fneg_svmR)
## [1] 2.258519
fneg_logs <- fneg_tcoxplus[4,]
#hist(fneg_logs)
mean(fneg_logs)
## [1] 3.8
median(fneg_logs)
## [1] 4
sd(fneg_logs)
## [1] 1.456438
fneg_rf <- fneg_tcoxplus[5,]
#hist(fneg_rf)
mean(fneg_rf)
## [1] 3.94
median(fneg_rf)
## [1] 4
sd(fneg_rf)
## [1] 1.13547
# save results
#save.image("~/results3_2010_final.RData")
dt3 <- acc[1,]
dt_en3 <- acc_enplus[1,]
dt_iTwiner3 <- acc_tcoxplus[1,]
#dt_hub <- acc_hubplus[1,]
acc_dt3 <- as.data.frame(c(dt3,dt_en3, dt_iTwiner3
#,dt_hub
))
colnames(acc_dt3) <- "acc"
#acc_dt$group <- "HUB + DT"
#acc_dt$group[1:100] <- "DT"
acc_dt3$group <- "DT"
acc_dt3$group[101:200] <- "EN + DT"
acc_dt3$group[201:300] <- "iTwiner + DT"
acc_dt3<- acc_dt3 %>% mutate_if(is.character,factor)
# acc_dt$group <- ordered(acc_dt$group, levels = c("DT", "EN + DT","iTwiner + DT", "HUB + DT"))
acc_dt3$group <- ordered(acc_dt3$group, levels = c("DT", "EN + DT","iTwiner + DT"))
acc_dt3$dataset <- "DATASET3"
svmL3 <- acc[2,]
svmL_en3 <- acc_enplus[2,]
svmL_iTwiner3 <- acc_tcoxplus[2,]
acc_svmL3 <- as.data.frame(c(svmL3,svmL_en3, svmL_iTwiner3
#,svmL_hub
))
colnames(acc_svmL3) <- "acc"
acc_svmL3$group <- "svmL"
acc_svmL3$group[101:200] <- "EN + svmL"
acc_svmL3$group[201:300] <- "iTwiner + svmL"
acc_svmL3<- acc_svmL3 %>% mutate_if(is.character,factor)
acc_svmL3$group <- ordered(acc_svmL3$group, levels = c("svmL", "EN + svmL","iTwiner + svmL"))
acc_svmL3$dataset <- "DATASET3"
svmR3 <- acc[3,]
svmR_en3 <- acc_enplus[3,]
svmR_iTwiner3 <- acc_tcoxplus[3,]
acc_svmR3 <- as.data.frame(c(svmR3,svmR_en3, svmR_iTwiner3
#,svmR_hub
))
colnames(acc_svmR3) <- "acc"
acc_svmR3$group <- "svmR"
acc_svmR3$group[101:200] <- "EN + svmR"
acc_svmR3$group[201:300] <- "iTwiner + svmR"
acc_svmR3<- acc_svmR3 %>% mutate_if(is.character,factor)
acc_svmR3$group <- ordered(acc_svmR3$group, levels = c("svmR", "EN + svmR","iTwiner + svmR"))
acc_svmR3$dataset <- "DATASET3"
logist3 <- acc[4,]
logist_en3 <- acc_enplus[4,]
logist_iTwiner3 <- acc_tcoxplus[4,]
acc_logist3 <- as.data.frame(c(logist3,logist_en3, logist_iTwiner3
#,logist_hub
))
colnames(acc_logist3) <- "acc"
acc_logist3$group <- "logist"
acc_logist3$group[101:200] <- "EN + logist"
acc_logist3$group[201:300] <- "iTwiner + logist"
acc_logist3<- acc_logist3 %>% mutate_if(is.character,factor)
acc_logist3$group <- ordered(acc_logist3$group, levels = c("logist", "EN + logist","iTwiner + logist"))
acc_logist3$dataset <- "DATASET3"
rf3 <- acc[5,]
rf_en3 <- acc_enplus[5,]
rf_iTwiner3 <- acc_tcoxplus[5,]
acc_rf3 <- as.data.frame(c(rf3,rf_en3, rf_iTwiner3
#,rf_hub
))
colnames(acc_rf3) <- "acc"
acc_rf3$group <- "rf"
acc_rf3$group[101:200] <- "EN + rf"
acc_rf3$group[201:300] <- "iTwiner + rf"
acc_rf3<- acc_rf3 %>% mutate_if(is.character,factor)
acc_rf3$group <- ordered(acc_rf3$group, levels = c("rf", "EN + rf","iTwiner + rf"))
acc_rf3$dataset <- "DATASET3"
# Libraries
library(ggplot2)
library(dplyr)
library(forcats)
library(hrbrthemes)
## NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
## Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
## if Arial Narrow is not on your system, please see https://bit.ly/arialnarrow
library(viridis)
## Loading required package: viridisLite
# Load dataset from github
data_dt <- rbind(acc_dt1,acc_dt2,acc_dt3)
p <- ggplot(data_dt, aes(x = group , y = acc, fill = dataset )) + geom_boxplot()
p <- p + theme_bw()
p + scale_fill_brewer(palette="Pastel1") + labs(title="DT",x="Method", y = "Acc") + theme(plot.title = element_text(size = 16, face = "bold.italic", hjust = 0.5), axis.text = element_text(size = 12), axis.title=element_text(size=14,face="bold") )
data_svmL <- rbind(acc_svmL1,acc_svmL2,acc_svmL3)
p <- ggplot(data_svmL, aes(x = group , y = acc, fill = dataset )) + geom_boxplot()
p <- p + theme_bw()
p + scale_fill_brewer(palette="Pastel1") + labs(title="svmL",x="Method", y = "Acc") + theme(plot.title = element_text(size = 16, face = "bold.italic", hjust = 0.5), axis.text = element_text(size = 12), axis.title=element_text(size=14,face="bold") )
data_svmR <- rbind(acc_svmR1,acc_svmR2,acc_svmR3)
p <- ggplot(data_svmR, aes(x = group , y = acc, fill = dataset )) + geom_boxplot()
p <- p + theme_bw()
p + scale_fill_brewer(palette="Pastel1") + labs(title="svmR",x="Method", y = "Acc") + theme(plot.title = element_text(size = 16, face = "bold.italic", hjust = 0.5), axis.text = element_text(size = 12), axis.title=element_text(size=14,face="bold") )
data_logis <- rbind(acc_logist1,acc_logist2,acc_logist3)
p <- ggplot(data_logis, aes(x = group , y = acc, fill = dataset )) + geom_boxplot()
p <- p + theme_bw()
p + scale_fill_brewer(palette="Pastel1") + labs(title="LR",x="Method", y = "Acc") + theme(plot.title = element_text(size = 16, face = "bold.italic", hjust = 0.5), axis.text = element_text(size = 12), axis.title=element_text(size=14,face="bold") )
data_rf <- rbind(acc_rf1,acc_rf2,acc_rf3)
p <- ggplot(data_rf, aes(x = group , y = acc, fill = dataset )) + geom_boxplot()
p <- p + theme_bw()
p + scale_fill_brewer(palette="Pastel1") + labs(title="RF",x="Method", y = "Acc") + theme(plot.title = element_text(size = 16, face = "bold.italic", hjust = 0.5), axis.text = element_text(size = 12), axis.title=element_text(size=14,face="bold") )
# Libraries
library(ggplot2)
library(dplyr)
library(forcats)
library(hrbrthemes)
library(viridis)
# Load dataset from github
data_dt <- rbind(acc_dt1,acc_dt2,acc_dt3)
p <- ggplot(data_dt, aes(x = group , y = acc, fill = dataset )) + geom_boxplot()
p <- p + theme_bw()
p + scale_fill_brewer(palette="Pastel1") + labs(title="Decision Trees",x="Method", y = "Acc") + theme(plot.title = element_text(size = 16, face = "bold.italic", hjust = 0.5), axis.text = element_text(size = 12), axis.title=element_text(size=14,face="bold") )
data_svmL <- rbind(acc_svmL1,acc_svmL2,acc_svmL3)
p <- ggplot(data_svmL, aes(x = group , y = acc, fill = dataset )) + geom_boxplot()
p <- p + theme_bw()
p + scale_fill_brewer(palette="Pastel1") + labs(title="svmL",x="Method", y = "Acc") + theme(plot.title = element_text(size = 16, face = "bold.italic", hjust = 0.5), axis.text = element_text(size = 12), axis.title=element_text(size=14,face="bold") )
data_svmR <- rbind(acc_svmR1,acc_svmR2,acc_svmR3)
p <- ggplot(data_svmR, aes(x = group , y = acc, fill = dataset )) + geom_boxplot()
p <- p + theme_bw()
p + scale_fill_brewer(palette="Pastel1") + labs(title="svmR",x="Method", y = "Acc") + theme(plot.title = element_text(size = 16, face = "bold.italic", hjust = 0.5), axis.text = element_text(size = 12), axis.title=element_text(size=14,face="bold") )
data_logis <- rbind(acc_logist1,acc_logist2,acc_logist3)
p <- ggplot(data_logis, aes(x = group , y = acc, fill = dataset )) + geom_boxplot()
p <- p + theme_bw()
p + scale_fill_brewer(palette="Pastel1") + labs(title="Logistic",x="Method", y = "Acc") + theme(plot.title = element_text(size = 16, face = "bold.italic", hjust = 0.5), axis.text = element_text(size = 12), axis.title=element_text(size=14,face="bold") )
data_rf <- rbind(acc_rf1,acc_rf2,acc_rf3)
p <- ggplot(data_rf, aes(x = group , y = acc, fill = dataset )) + geom_boxplot()
p <- p + theme_bw()
p + scale_fill_brewer(palette="Pastel1") + labs(title="Random Forest",x="Method", y = "Acc") + theme(plot.title = element_text(size = 16, face = "bold.italic", hjust = 0.5), axis.text = element_text(size = 12), axis.title=element_text(size=14,face="bold") )
all decision trees
stat.test <- compare_means(acc ~ dataset, data = data_dt,
group.by = "group", paired = F, p.adjust.method = "BH")
stat.test
## # A tibble: 9 × 9
## group .y. group1 group2 p p.adj p.format p.signif method
## <ord> <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 DT acc DATASET1 DATASET2 1.91e- 1 2.5e- 1 0.1907 ns Wilco…
## 2 DT acc DATASET1 DATASET3 4.41e- 3 6.6e- 3 0.0044 ** Wilco…
## 3 DT acc DATASET2 DATASET3 2.02e- 5 3.6e- 5 2.0e-05 **** Wilco…
## 4 EN + DT acc DATASET1 DATASET2 6.10e- 6 1.4e- 5 6.1e-06 **** Wilco…
## 5 EN + DT acc DATASET1 DATASET3 4.94e- 6 1.4e- 5 4.9e-06 **** Wilco…
## 6 EN + DT acc DATASET2 DATASET3 7.50e- 1 7.5e- 1 0.7499 ns Wilco…
## 7 iTwiner + DT acc DATASET1 DATASET2 2.72e-13 2.4e-12 2.7e-13 **** Wilco…
## 8 iTwiner + DT acc DATASET1 DATASET3 1.31e-10 5.9e-10 1.3e-10 **** Wilco…
## 9 iTwiner + DT acc DATASET2 DATASET3 6.89e- 1 7.5e- 1 0.6890 ns Wilco…
ggboxplot(data_dt, x = "group", y = "acc",
fill = "dataset") + scale_fill_brewer(palette="Pastel1") + labs(title="Decision Trees",x="Method", y = "Acc") + theme(plot.title = element_text(size = 16, face = "bold.italic", hjust = 0.5), axis.text = element_text(size = 12), axis.title=element_text(size=14,face="bold"), legend.title = element_blank())
#p + stat_compare_means(aes(group = dataset))
#stat_compare_means(label.y = 50)
#aes(group = dataset),comparisons = my_comparisons
DT
dt_stats <- as.data.frame(data_dt[c(1:100,301:400,601:700),])
# Statistical test
stat.test <- compare_means(acc ~ dataset, data = dt_stats,
group.by = "group", paired = F, p.adjust.method = "BH")
stat.test
## # A tibble: 3 × 9
## group .y. group1 group2 p p.adj p.format p.signif method
## <ord> <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 DT acc DATASET1 DATASET2 0.191 0.19 0.1907 ns Wilcoxon
## 2 DT acc DATASET1 DATASET3 0.00441 0.0066 0.0044 ** Wilcoxon
## 3 DT acc DATASET2 DATASET3 0.0000202 0.000061 2e-05 **** Wilcoxon
# Box plot
my_comparisons <- list( c("DATASET1", "DATASET2"), c("DATASET1", "DATASET3"), c("DATASET2", "DATASET3"))
ggboxplot(dt_stats, x = "dataset", y = "acc", fill = "dataset") + scale_fill_brewer(palette="Pastel1") + labs(title="Decision Trees",x="DT", y = "Acc") + theme(plot.title = element_text(size = 16, face = "bold.italic", hjust = 0.5), axis.text = element_text(size = 12), axis.title=element_text(size=14,face="bold"),legend.position = "none") + stat_compare_means(comparisons = my_comparisons,label = "p.signif")
DT + EN
dt_en_stats <- as.data.frame(data_dt[c(101:200,401:500,701:800),])
# Statistical test
stat.test <- compare_means(acc ~ dataset, data = dt_en_stats,
group.by = "group", paired = F, p.adjust.method = "BH")
stat.test
## # A tibble: 3 × 9
## group .y. group1 group2 p p.adj p.format p.signif method
## <ord> <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 EN + DT acc DATASET1 DATASET2 0.00000610 0.0000091 6.1e-06 **** Wilcox…
## 2 EN + DT acc DATASET1 DATASET3 0.00000494 0.0000091 4.9e-06 **** Wilcox…
## 3 EN + DT acc DATASET2 DATASET3 0.750 0.75 0.75 ns Wilcox…
# Box plot
my_comparisons <- list( c("DATASET1", "DATASET2"), c("DATASET1", "DATASET3"), c("DATASET2", "DATASET3"))
ggboxplot(dt_en_stats, x = "dataset", y = "acc", fill = "dataset") + scale_fill_brewer(palette="Pastel1") + labs(title="Decision Trees",x="DT + EN", y = "Acc") + theme(plot.title = element_text(size = 16, face = "bold.italic", hjust = 0.5), axis.text = element_text(size = 12), axis.title=element_text(size=14,face="bold"),legend.position = "none") + stat_compare_means(comparisons = my_comparisons,label = "p.signif")
DT + iTW
dt_itw_stats <- as.data.frame(data_dt[c(201:300,501:600,801:900),])
# Statistical test
stat.test <- compare_means(acc ~ dataset, data = dt_itw_stats,
group.by = "group", paired = F, p.adjust.method = "BH")
stat.test
## # A tibble: 3 × 9
## group .y. group1 group2 p p.adj p.format p.signif method
## <ord> <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 iTwiner + DT acc DATASET1 DATASET2 2.72e-13 8.1e-13 2.7e-13 **** Wilco…
## 2 iTwiner + DT acc DATASET1 DATASET3 1.31e-10 2 e-10 1.3e-10 **** Wilco…
## 3 iTwiner + DT acc DATASET2 DATASET3 6.89e- 1 6.9e- 1 0.69 ns Wilco…
# Box plot
my_comparisons <- list( c("DATASET1", "DATASET2"), c("DATASET1", "DATASET3"), c("DATASET2", "DATASET3"))
ggboxplot(dt_itw_stats, x = "dataset", y = "acc", fill = "dataset") + scale_fill_brewer(palette="Pastel1") + labs(title="Decision Trees",x="DT + iTW", y = "Acc") + theme(plot.title = element_text(size = 16, face = "bold.italic", hjust = 0.5), axis.text = element_text(size = 12), axis.title=element_text(size=14,face="bold"),legend.position = "none") + stat_compare_means(comparisons = my_comparisons,label = "p.signif")
all svmL
stat.test <- compare_means(acc ~ dataset, data = data_svmL,
group.by = "group", paired = F, p.adjust.method = "BH")
stat.test
## # A tibble: 9 × 9
## group .y. group1 group2 p p.adj p.format p.signif method
## <ord> <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 svmL acc DATASET1 DATAS… 5.09e- 2 9.2e- 2 0.051 ns Wilco…
## 2 svmL acc DATASET1 DATAS… 3.15e- 1 4.7e- 1 0.315 ns Wilco…
## 3 svmL acc DATASET2 DATAS… 1.05e- 3 2.4e- 3 0.001 ** Wilco…
## 4 EN + svmL acc DATASET1 DATAS… 6.46e- 1 7.5e- 1 0.646 ns Wilco…
## 5 EN + svmL acc DATASET1 DATAS… 9.30e- 1 9.3e- 1 0.930 ns Wilco…
## 6 EN + svmL acc DATASET2 DATAS… 6.66e- 1 7.5e- 1 0.666 ns Wilco…
## 7 iTwiner + svmL acc DATASET1 DATAS… 1.27e- 9 5.7e- 9 1.3e-09 **** Wilco…
## 8 iTwiner + svmL acc DATASET1 DATAS… 4.70e-20 4.2e-19 < 2e-16 **** Wilco…
## 9 iTwiner + svmL acc DATASET2 DATAS… 2.82e- 6 8.5e- 6 2.8e-06 **** Wilco…
ggboxplot(data_svmL, x = "group", y = "acc",
fill = "dataset") + scale_fill_brewer(palette="Pastel1") + labs(title="svmL",x="Method", y = "Acc") + theme(plot.title = element_text(size = 16, face = "bold.italic", hjust = 0.5), axis.text = element_text(size = 12), axis.title=element_text(size=14,face="bold"), legend.title = element_blank())
svmR
svmR_stats <- as.data.frame(data_svmR[c(1:100,301:400,601:700),c(1,3)])
colnames(svmR_stats) <- c("acc", "group")
#Kruskal-Wallis test
kruskal.test(acc ~ group, data = svmR_stats)
##
## Kruskal-Wallis rank sum test
##
## data: acc by group
## Kruskal-Wallis chi-squared = 55.892, df = 2, p-value = 7.297e-13
#pairwise.wilcox.test() to calculate pairwise comparisons between group levels with corrections for multiple testing
res <- pairwise.wilcox.test(svmR_stats$acc, svmR_stats$group,
p.adjust.method = "BH")
res$p.value
## DATASET1 DATASET2
## DATASET2 2.075462e-02 NA
## DATASET3 6.533107e-13 6.701501e-07
svmR + EN
svmR_en_stats <- as.data.frame(data_svmR[c(101:200,401:500,701:800),c(1,3)])
colnames(svmR_en_stats) <- c("acc", "group")
#Kruskal-Wallis test
kruskal.test(acc ~ group, data = svmR_en_stats)
##
## Kruskal-Wallis rank sum test
##
## data: acc by group
## Kruskal-Wallis chi-squared = 6.1573, df = 2, p-value = 0.04602
#pairwise.wilcox.test() to calculate pairwise comparisons between group levels with corrections for multiple testing
res <- pairwise.wilcox.test(svmR_en_stats$acc, svmR_en_stats$group,
p.adjust.method = "BH")
res$p.value
## DATASET1 DATASET2
## DATASET2 0.7995923 NA
## DATASET3 0.0731924 0.0605481
svmR + iTW
svmR_itw_stats <- as.data.frame(data_svmR[c(201:300,501:600,801:900),c(1,3)])
colnames(svmR_itw_stats) <- c("acc", "group")
#Kruskal-Wallis test
kruskal.test(acc ~ group, data = svmR_itw_stats)
##
## Kruskal-Wallis rank sum test
##
## data: acc by group
## Kruskal-Wallis chi-squared = 124.65, df = 2, p-value < 2.2e-16
#pairwise.wilcox.test() to calculate pairwise comparisons between group levels with corrections for multiple testing
res <- pairwise.wilcox.test(svmR_itw_stats$acc, svmR_itw_stats$group,
p.adjust.method = "BH")
res$p.value
## DATASET1 DATASET2
## DATASET2 4.036850e-09 NA
## DATASET3 3.273268e-24 2.028999e-12
logis
logis_stats <- as.data.frame(data_logis[c(1:100,301:400,601:700),c(1,3)])
colnames(logis_stats) <- c("acc", "group")
#Kruskal-Wallis test
kruskal.test(acc ~ group, data = logis_stats)
##
## Kruskal-Wallis rank sum test
##
## data: acc by group
## Kruskal-Wallis chi-squared = 9.591, df = 2, p-value = 0.008267
#pairwise.wilcox.test() to calculate pairwise comparisons between group levels with corrections for multiple testing
res <- pairwise.wilcox.test(logis_stats$acc, logis_stats$group,
p.adjust.method = "BH")
res$p.value
## DATASET1 DATASET2
## DATASET2 0.00696590 NA
## DATASET3 0.05380181 0.4193445
logis + EN
logis_en_stats <- as.data.frame(data_logis[c(101:200,401:500,701:800),c(1,3)])
colnames(logis_en_stats) <- c("acc", "group")
#Kruskal-Wallis test
kruskal.test(acc ~ group, data = logis_en_stats)
##
## Kruskal-Wallis rank sum test
##
## data: acc by group
## Kruskal-Wallis chi-squared = 12.276, df = 2, p-value = 0.002159
#pairwise.wilcox.test() to calculate pairwise comparisons between group levels with corrections for multiple testing
res <- pairwise.wilcox.test(logis_en_stats$acc, logis_en_stats$group,
p.adjust.method = "BH")
res$p.value
## DATASET1 DATASET2
## DATASET2 0.36443843 NA
## DATASET3 0.02405311 0.001572091
logis + iTW
logis_itw_stats <- as.data.frame(data_logis[c(201:300,501:600,801:900),c(1,3)])
colnames(logis_itw_stats) <- c("acc", "group")
#Kruskal-Wallis test
kruskal.test(acc ~ group, data = logis_itw_stats)
##
## Kruskal-Wallis rank sum test
##
## data: acc by group
## Kruskal-Wallis chi-squared = 61.238, df = 2, p-value = 5.039e-14
#pairwise.wilcox.test() to calculate pairwise comparisons between group levels with corrections for multiple testing
res <- pairwise.wilcox.test(logis_itw_stats$acc, logis_itw_stats$group,
p.adjust.method = "BH")
res$p.value
## DATASET1 DATASET2
## DATASET2 1.921927e-07 NA
## DATASET3 1.658938e-13 0.007099146
rf
rf_stats <- as.data.frame(data_rf[c(1:100,301:400,601:700),c(1,3)])
colnames(rf_stats) <- c("acc", "group")
#Kruskal-Wallis test
kruskal.test(acc ~ group, data = rf_stats)
##
## Kruskal-Wallis rank sum test
##
## data: acc by group
## Kruskal-Wallis chi-squared = 30.39, df = 2, p-value = 2.517e-07
#pairwise.wilcox.test() to calculate pairwise comparisons between group levels with corrections for multiple testing
res <- pairwise.wilcox.test(rf_stats$acc, rf_stats$group,
p.adjust.method = "BH")
res$p.value
## DATASET1 DATASET2
## DATASET2 1.943128e-02 NA
## DATASET3 1.860940e-07 0.001425355
rf + EN
rf_en_stats <- as.data.frame(data_rf[c(101:200,401:500,701:800),c(1,3)])
colnames(rf_en_stats) <- c("acc", "group")
#Kruskal-Wallis test
kruskal.test(acc ~ group, data = rf_en_stats)
##
## Kruskal-Wallis rank sum test
##
## data: acc by group
## Kruskal-Wallis chi-squared = 10.373, df = 2, p-value = 0.005591
#pairwise.wilcox.test() to calculate pairwise comparisons between group levels with corrections for multiple testing
res <- pairwise.wilcox.test(rf_en_stats$acc, rf_en_stats$group,
p.adjust.method = "BH")
res$p.value
## DATASET1 DATASET2
## DATASET2 0.01629207 NA
## DATASET3 0.01073887 0.4640821
rf + iTW
rf_itw_stats <- as.data.frame(data_rf[c(201:300,501:600,801:900),c(1,3)])
colnames(rf_itw_stats) <- c("acc", "group")
#Kruskal-Wallis test
kruskal.test(acc ~ group, data = rf_itw_stats)
##
## Kruskal-Wallis rank sum test
##
## data: acc by group
## Kruskal-Wallis chi-squared = 146.65, df = 2, p-value < 2.2e-16
#pairwise.wilcox.test() to calculate pairwise comparisons between group levels with corrections for multiple testing
res <- pairwise.wilcox.test(rf_itw_stats$acc, rf_itw_stats$group,
p.adjust.method = "BH")
res$p.value
## DATASET1 DATASET2
## DATASET2 1.010677e-06 NA
## DATASET3 2.376360e-27 2.485967e-19
save.image("~/paper_final.RData")